LCOV - code coverage report
Current view: top level - third_party/aom/av1/common - idct.c (source / functions) Hit Total Coverage
Test: output.info Lines: 0 883 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 79 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
       3             :  *
       4             :  * This source code is subject to the terms of the BSD 2 Clause License and
       5             :  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
       6             :  * was not distributed with this source code in the LICENSE file, you can
       7             :  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
       8             :  * Media Patent License 1.0 was not distributed with this source code in the
       9             :  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
      10             :  */
      11             : 
      12             : #include <math.h>
      13             : 
      14             : #include "./aom_dsp_rtcd.h"
      15             : #include "./av1_rtcd.h"
      16             : #include "aom_dsp/inv_txfm.h"
      17             : #include "aom_ports/mem.h"
      18             : #include "av1/common/av1_inv_txfm1d_cfg.h"
      19             : #include "av1/common/blockd.h"
      20             : #include "av1/common/enums.h"
      21             : #include "av1/common/idct.h"
      22             : 
      23           0 : int av1_get_tx_scale(const TX_SIZE tx_size) {
      24           0 :   if (txsize_sqr_up_map[tx_size] == TX_32X32) return 1;
      25             : #if CONFIG_TX64X64
      26             :   else if (txsize_sqr_up_map[tx_size] == TX_64X64)
      27             :     return 2;
      28             : #endif  // CONFIG_TX64X64
      29             :   else
      30           0 :     return 0;
      31             : }
      32             : 
      33             : // NOTE: The implementation of all inverses need to be aware of the fact
      34             : // that input and output could be the same buffer.
      35             : 
      36             : #if CONFIG_EXT_TX
      37           0 : static void iidtx4_c(const tran_low_t *input, tran_low_t *output) {
      38             :   int i;
      39           0 :   for (i = 0; i < 4; ++i)
      40           0 :     output[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
      41           0 : }
      42             : 
      43           0 : static void iidtx8_c(const tran_low_t *input, tran_low_t *output) {
      44             :   int i;
      45           0 :   for (i = 0; i < 8; ++i) output[i] = input[i] * 2;
      46           0 : }
      47             : 
      48           0 : static void iidtx16_c(const tran_low_t *input, tran_low_t *output) {
      49             :   int i;
      50           0 :   for (i = 0; i < 16; ++i)
      51           0 :     output[i] = (tran_low_t)dct_const_round_shift(input[i] * 2 * Sqrt2);
      52           0 : }
      53             : 
      54           0 : static void iidtx32_c(const tran_low_t *input, tran_low_t *output) {
      55             :   int i;
      56           0 :   for (i = 0; i < 32; ++i) output[i] = input[i] * 4;
      57           0 : }
      58             : 
      59             : #if CONFIG_TX64X64
      60             : static void iidtx64_c(const tran_low_t *input, tran_low_t *output) {
      61             :   int i;
      62             :   for (i = 0; i < 64; ++i)
      63             :     output[i] = (tran_low_t)dct_const_round_shift(input[i] * 4 * Sqrt2);
      64             : }
      65             : #endif  // CONFIG_TX64X64
      66             : #endif  // CONFIG_EXT_TX
      67             : 
      68             : // For use in lieu of ADST
      69           0 : static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) {
      70             :   int i;
      71             :   tran_low_t inputhalf[16];
      72             :   // Multiply input by sqrt(2)
      73           0 :   for (i = 0; i < 16; ++i) {
      74           0 :     inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
      75             :   }
      76           0 :   for (i = 0; i < 16; ++i) {
      77           0 :     output[i] = input[16 + i] * 4;
      78             :   }
      79           0 :   aom_idct16_c(inputhalf, output + 16);
      80             :   // Note overall scaling factor is 4 times orthogonal
      81           0 : }
      82             : 
      83             : #if CONFIG_TX64X64
      84             : static void idct64_col_c(const tran_low_t *input, tran_low_t *output) {
      85             :   int32_t in[64], out[64];
      86             :   int i;
      87             :   for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
      88             :   av1_idct64_new(in, out, inv_cos_bit_col_dct_64, inv_stage_range_col_dct_64);
      89             :   for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
      90             : }
      91             : 
      92             : static void idct64_row_c(const tran_low_t *input, tran_low_t *output) {
      93             :   int32_t in[64], out[64];
      94             :   int i;
      95             :   for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
      96             :   av1_idct64_new(in, out, inv_cos_bit_row_dct_64, inv_stage_range_row_dct_64);
      97             :   for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
      98             : }
      99             : 
     100             : // For use in lieu of ADST
     101             : static void ihalfright64_c(const tran_low_t *input, tran_low_t *output) {
     102             :   int i;
     103             :   tran_low_t inputhalf[32];
     104             :   // Multiply input by sqrt(2)
     105             :   for (i = 0; i < 32; ++i) {
     106             :     inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
     107             :   }
     108             :   for (i = 0; i < 32; ++i) {
     109             :     output[i] = (tran_low_t)dct_const_round_shift(input[32 + i] * 4 * Sqrt2);
     110             :   }
     111             :   aom_idct32_c(inputhalf, output + 32);
     112             :   // Note overall scaling factor is 4 * sqrt(2)  times orthogonal
     113             : }
     114             : #endif  // CONFIG_TX64X64
     115             : 
     116             : #if CONFIG_HIGHBITDEPTH
     117             : #if CONFIG_EXT_TX
     118             : // TODO(sarahparker) these functions will be removed once the highbitdepth
     119             : // codepath works properly for rectangular transforms. They have almost
     120             : // identical versions in av1_inv_txfm1d.c, but those are currently only
     121             : // being used for square transforms.
     122           0 : static void highbd_iidtx4_c(const tran_low_t *input, tran_low_t *output,
     123             :                             int bd) {
     124             :   int i;
     125           0 :   for (i = 0; i < 4; ++i)
     126           0 :     output[i] = HIGHBD_WRAPLOW(dct_const_round_shift(input[i] * Sqrt2), bd);
     127           0 : }
     128             : 
     129           0 : static void highbd_iidtx8_c(const tran_low_t *input, tran_low_t *output,
     130             :                             int bd) {
     131             :   int i;
     132             :   (void)bd;
     133           0 :   for (i = 0; i < 8; ++i) output[i] = input[i] * 2;
     134           0 : }
     135             : 
     136           0 : static void highbd_iidtx16_c(const tran_low_t *input, tran_low_t *output,
     137             :                              int bd) {
     138             :   int i;
     139           0 :   for (i = 0; i < 16; ++i)
     140           0 :     output[i] = HIGHBD_WRAPLOW(dct_const_round_shift(input[i] * 2 * Sqrt2), bd);
     141           0 : }
     142             : 
     143           0 : static void highbd_iidtx32_c(const tran_low_t *input, tran_low_t *output,
     144             :                              int bd) {
     145             :   int i;
     146             :   (void)bd;
     147           0 :   for (i = 0; i < 32; ++i) output[i] = input[i] * 4;
     148           0 : }
     149             : #endif  // CONFIG_EXT_TX
     150             : 
     151           0 : static void highbd_ihalfright32_c(const tran_low_t *input, tran_low_t *output,
     152             :                                   int bd) {
     153             :   int i;
     154             :   tran_low_t inputhalf[16];
     155             :   // Multiply input by sqrt(2)
     156           0 :   for (i = 0; i < 16; ++i) {
     157           0 :     inputhalf[i] = HIGHBD_WRAPLOW(dct_const_round_shift(input[i] * Sqrt2), bd);
     158             :   }
     159           0 :   for (i = 0; i < 16; ++i) {
     160           0 :     output[i] = input[16 + i] * 4;
     161             :   }
     162           0 :   aom_highbd_idct16_c(inputhalf, output + 16, bd);
     163             :   // Note overall scaling factor is 4 times orthogonal
     164           0 : }
     165             : 
     166             : #if CONFIG_EXT_TX
     167             : #if CONFIG_TX64X64
     168             : static void highbd_iidtx64_c(const tran_low_t *input, tran_low_t *output,
     169             :                              int bd) {
     170             :   int i;
     171             :   for (i = 0; i < 64; ++i)
     172             :     output[i] = HIGHBD_WRAPLOW(dct_const_round_shift(input[i] * 4 * Sqrt2), bd);
     173             : }
     174             : #endif  // CONFIG_TX64X64
     175             : #endif  // CONFIG_EXT_TX
     176             : 
     177             : #if CONFIG_TX64X64
     178             : // For use in lieu of ADST
     179             : static void highbd_ihalfright64_c(const tran_low_t *input, tran_low_t *output,
     180             :                                   int bd) {
     181             :   int i;
     182             :   tran_low_t inputhalf[32];
     183             :   // Multiply input by sqrt(2)
     184             :   for (i = 0; i < 32; ++i) {
     185             :     inputhalf[i] = HIGHBD_WRAPLOW(dct_const_round_shift(input[i] * Sqrt2), bd);
     186             :   }
     187             :   for (i = 0; i < 32; ++i) {
     188             :     output[i] =
     189             :         HIGHBD_WRAPLOW(dct_const_round_shift(input[32 + i] * 4 * Sqrt2), bd);
     190             :   }
     191             :   aom_highbd_idct32_c(inputhalf, output + 32, bd);
     192             :   // Note overall scaling factor is 4 * sqrt(2)  times orthogonal
     193             : }
     194             : 
     195             : static void highbd_idct64_col_c(const tran_low_t *input, tran_low_t *output,
     196             :                                 int bd) {
     197             :   int32_t in[64], out[64];
     198             :   int i;
     199             :   (void)bd;
     200             :   for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
     201             :   av1_idct64_new(in, out, inv_cos_bit_col_dct_64, inv_stage_range_col_dct_64);
     202             :   for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
     203             : }
     204             : 
     205             : static void highbd_idct64_row_c(const tran_low_t *input, tran_low_t *output,
     206             :                                 int bd) {
     207             :   int32_t in[64], out[64];
     208             :   int i;
     209             :   (void)bd;
     210             :   for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
     211             :   av1_idct64_new(in, out, inv_cos_bit_row_dct_64, inv_stage_range_row_dct_64);
     212             :   for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
     213             : }
     214             : #endif  // CONFIG_TX64X64
     215             : #endif  // CONFIG_HIGHBITDEPTH
     216             : 
     217             : // Inverse identity transform and add.
     218             : #if CONFIG_EXT_TX
     219           0 : static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
     220             :                            int bs, int tx_type) {
     221             :   int r, c;
     222           0 :   const int shift = bs < 32 ? 3 : (bs < 64 ? 2 : 1);
     223           0 :   if (tx_type == IDTX) {
     224           0 :     for (r = 0; r < bs; ++r) {
     225           0 :       for (c = 0; c < bs; ++c)
     226           0 :         dest[c] = clip_pixel_add(dest[c], input[c] >> shift);
     227           0 :       dest += stride;
     228           0 :       input += bs;
     229             :     }
     230             :   }
     231           0 : }
     232             : #endif  // CONFIG_EXT_TX
     233             : 
     234             : #define FLIPUD_PTR(dest, stride, size)       \
     235             :   do {                                       \
     236             :     (dest) = (dest) + ((size)-1) * (stride); \
     237             :     (stride) = -(stride);                    \
     238             :   } while (0)
     239             : 
     240             : #if CONFIG_EXT_TX
     241           0 : static void maybe_flip_strides(uint8_t **dst, int *dstride, tran_low_t **src,
     242             :                                int *sstride, int tx_type, int sizey,
     243             :                                int sizex) {
     244             :   // Note that the transpose of src will be added to dst. In order to LR
     245             :   // flip the addends (in dst coordinates), we UD flip the src. To UD flip
     246             :   // the addends, we UD flip the dst.
     247           0 :   switch (tx_type) {
     248             :     case DCT_DCT:
     249             :     case ADST_DCT:
     250             :     case DCT_ADST:
     251             :     case ADST_ADST:
     252             :     case IDTX:
     253             :     case V_DCT:
     254             :     case H_DCT:
     255             :     case V_ADST:
     256           0 :     case H_ADST: break;
     257             :     case FLIPADST_DCT:
     258             :     case FLIPADST_ADST:
     259             :     case V_FLIPADST:
     260             :       // flip UD
     261           0 :       FLIPUD_PTR(*dst, *dstride, sizey);
     262           0 :       break;
     263             :     case DCT_FLIPADST:
     264             :     case ADST_FLIPADST:
     265             :     case H_FLIPADST:
     266             :       // flip LR
     267           0 :       FLIPUD_PTR(*src, *sstride, sizex);
     268           0 :       break;
     269             :     case FLIPADST_FLIPADST:
     270             :       // flip UD
     271           0 :       FLIPUD_PTR(*dst, *dstride, sizey);
     272             :       // flip LR
     273           0 :       FLIPUD_PTR(*src, *sstride, sizex);
     274           0 :       break;
     275           0 :     default: assert(0); break;
     276             :   }
     277           0 : }
     278             : #endif  // CONFIG_EXT_TX
     279             : 
     280             : #if CONFIG_HIGHBITDEPTH
     281             : #if CONFIG_EXT_TX
     282           0 : static void highbd_inv_idtx_add_c(const tran_low_t *input, uint8_t *dest8,
     283             :                                   int stride, int bs, int tx_type, int bd) {
     284             :   int r, c;
     285           0 :   const int shift = bs < 32 ? 3 : 2;
     286           0 :   uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
     287             : 
     288           0 :   if (tx_type == IDTX) {
     289           0 :     for (r = 0; r < bs; ++r) {
     290           0 :       for (c = 0; c < bs; ++c)
     291           0 :         dest[c] = highbd_clip_pixel_add(dest[c], input[c] >> shift, bd);
     292           0 :       dest += stride;
     293           0 :       input += bs;
     294             :     }
     295             :   }
     296           0 : }
     297             : 
     298           0 : static void maybe_flip_strides16(uint16_t **dst, int *dstride, tran_low_t **src,
     299             :                                  int *sstride, int tx_type, int sizey,
     300             :                                  int sizex) {
     301             :   // Note that the transpose of src will be added to dst. In order to LR
     302             :   // flip the addends (in dst coordinates), we UD flip the src. To UD flip
     303             :   // the addends, we UD flip the dst.
     304           0 :   switch (tx_type) {
     305             :     case DCT_DCT:
     306             :     case ADST_DCT:
     307             :     case DCT_ADST:
     308             :     case ADST_ADST:
     309             :     case IDTX:
     310             :     case V_DCT:
     311             :     case H_DCT:
     312             :     case V_ADST:
     313           0 :     case H_ADST: break;
     314             :     case FLIPADST_DCT:
     315             :     case FLIPADST_ADST:
     316             :     case V_FLIPADST:
     317             :       // flip UD
     318           0 :       FLIPUD_PTR(*dst, *dstride, sizey);
     319           0 :       break;
     320             :     case DCT_FLIPADST:
     321             :     case ADST_FLIPADST:
     322             :     case H_FLIPADST:
     323             :       // flip LR
     324           0 :       FLIPUD_PTR(*src, *sstride, sizex);
     325           0 :       break;
     326             :     case FLIPADST_FLIPADST:
     327             :       // flip UD
     328           0 :       FLIPUD_PTR(*dst, *dstride, sizey);
     329             :       // flip LR
     330           0 :       FLIPUD_PTR(*src, *sstride, sizex);
     331           0 :       break;
     332           0 :     default: assert(0); break;
     333             :   }
     334           0 : }
     335             : #endif  // CONFIG_EXT_TX
     336             : #endif  // CONFIG_HIGHBITDEPTH
     337             : 
     338           0 : void av1_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
     339             :                          int tx_type) {
     340             :   static const transform_2d IHT_4[] = {
     341             :     { aom_idct4_c, aom_idct4_c },    // DCT_DCT  = 0
     342             :     { aom_iadst4_c, aom_idct4_c },   // ADST_DCT = 1
     343             :     { aom_idct4_c, aom_iadst4_c },   // DCT_ADST = 2
     344             :     { aom_iadst4_c, aom_iadst4_c },  // ADST_ADST = 3
     345             : #if CONFIG_EXT_TX
     346             :     { aom_iadst4_c, aom_idct4_c },   // FLIPADST_DCT
     347             :     { aom_idct4_c, aom_iadst4_c },   // DCT_FLIPADST
     348             :     { aom_iadst4_c, aom_iadst4_c },  // FLIPADST_FLIPADST
     349             :     { aom_iadst4_c, aom_iadst4_c },  // ADST_FLIPADST
     350             :     { aom_iadst4_c, aom_iadst4_c },  // FLIPADST_ADST
     351             :     { iidtx4_c, iidtx4_c },          // IDTX
     352             :     { aom_idct4_c, iidtx4_c },       // V_DCT
     353             :     { iidtx4_c, aom_idct4_c },       // H_DCT
     354             :     { aom_iadst4_c, iidtx4_c },      // V_ADST
     355             :     { iidtx4_c, aom_iadst4_c },      // H_ADST
     356             :     { aom_iadst4_c, iidtx4_c },      // V_FLIPADST
     357             :     { iidtx4_c, aom_iadst4_c },      // H_FLIPADST
     358             : #endif                               // CONFIG_EXT_TX
     359             :   };
     360             : 
     361             :   int i, j;
     362             :   tran_low_t tmp[4][4];
     363             :   tran_low_t out[4][4];
     364           0 :   tran_low_t *outp = &out[0][0];
     365           0 :   int outstride = 4;
     366             : 
     367             :   // inverse transform row vectors
     368           0 :   for (i = 0; i < 4; ++i) {
     369           0 :     IHT_4[tx_type].rows(input, out[i]);
     370           0 :     input += 4;
     371             :   }
     372             : 
     373             :   // transpose
     374           0 :   for (i = 0; i < 4; i++) {
     375           0 :     for (j = 0; j < 4; j++) {
     376           0 :       tmp[j][i] = out[i][j];
     377             :     }
     378             :   }
     379             : 
     380             :   // inverse transform column vectors
     381           0 :   for (i = 0; i < 4; ++i) {
     382           0 :     IHT_4[tx_type].cols(tmp[i], out[i]);
     383             :   }
     384             : 
     385             : #if CONFIG_EXT_TX
     386           0 :   maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 4, 4);
     387             : #endif
     388             : 
     389             :   // Sum with the destination
     390           0 :   for (i = 0; i < 4; ++i) {
     391           0 :     for (j = 0; j < 4; ++j) {
     392           0 :       int d = i * stride + j;
     393           0 :       int s = j * outstride + i;
     394           0 :       dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
     395             :     }
     396             :   }
     397           0 : }
     398             : 
     399           0 : void av1_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
     400             :                          int tx_type) {
     401             :   static const transform_2d IHT_4x8[] = {
     402             :     { aom_idct8_c, aom_idct4_c },    // DCT_DCT
     403             :     { aom_iadst8_c, aom_idct4_c },   // ADST_DCT
     404             :     { aom_idct8_c, aom_iadst4_c },   // DCT_ADST
     405             :     { aom_iadst8_c, aom_iadst4_c },  // ADST_ADST
     406             : #if CONFIG_EXT_TX
     407             :     { aom_iadst8_c, aom_idct4_c },   // FLIPADST_DCT
     408             :     { aom_idct8_c, aom_iadst4_c },   // DCT_FLIPADST
     409             :     { aom_iadst8_c, aom_iadst4_c },  // FLIPADST_FLIPADST
     410             :     { aom_iadst8_c, aom_iadst4_c },  // ADST_FLIPADST
     411             :     { aom_iadst8_c, aom_iadst4_c },  // FLIPADST_ADST
     412             :     { iidtx8_c, iidtx4_c },          // IDTX
     413             :     { aom_idct8_c, iidtx4_c },       // V_DCT
     414             :     { iidtx8_c, aom_idct4_c },       // H_DCT
     415             :     { aom_iadst8_c, iidtx4_c },      // V_ADST
     416             :     { iidtx8_c, aom_iadst4_c },      // H_ADST
     417             :     { aom_iadst8_c, iidtx4_c },      // V_FLIPADST
     418             :     { iidtx8_c, aom_iadst4_c },      // H_FLIPADST
     419             : #endif
     420             :   };
     421             : 
     422           0 :   const int n = 4;
     423           0 :   const int n2 = 8;
     424             :   int i, j;
     425             :   tran_low_t out[4][8], tmp[4][8], outtmp[4];
     426           0 :   tran_low_t *outp = &out[0][0];
     427           0 :   int outstride = n2;
     428             : 
     429             :   // inverse transform row vectors and transpose
     430           0 :   for (i = 0; i < n2; ++i) {
     431           0 :     IHT_4x8[tx_type].rows(input, outtmp);
     432           0 :     for (j = 0; j < n; ++j)
     433           0 :       tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
     434           0 :     input += n;
     435             :   }
     436             : 
     437             :   // inverse transform column vectors
     438           0 :   for (i = 0; i < n; ++i) {
     439           0 :     IHT_4x8[tx_type].cols(tmp[i], out[i]);
     440             :   }
     441             : 
     442             : #if CONFIG_EXT_TX
     443           0 :   maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
     444             : #endif
     445             : 
     446             :   // Sum with the destination
     447           0 :   for (i = 0; i < n2; ++i) {
     448           0 :     for (j = 0; j < n; ++j) {
     449           0 :       int d = i * stride + j;
     450           0 :       int s = j * outstride + i;
     451           0 :       dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
     452             :     }
     453             :   }
     454           0 : }
     455             : 
     456           0 : void av1_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
     457             :                          int tx_type) {
     458             :   static const transform_2d IHT_8x4[] = {
     459             :     { aom_idct4_c, aom_idct8_c },    // DCT_DCT
     460             :     { aom_iadst4_c, aom_idct8_c },   // ADST_DCT
     461             :     { aom_idct4_c, aom_iadst8_c },   // DCT_ADST
     462             :     { aom_iadst4_c, aom_iadst8_c },  // ADST_ADST
     463             : #if CONFIG_EXT_TX
     464             :     { aom_iadst4_c, aom_idct8_c },   // FLIPADST_DCT
     465             :     { aom_idct4_c, aom_iadst8_c },   // DCT_FLIPADST
     466             :     { aom_iadst4_c, aom_iadst8_c },  // FLIPADST_FLIPADST
     467             :     { aom_iadst4_c, aom_iadst8_c },  // ADST_FLIPADST
     468             :     { aom_iadst4_c, aom_iadst8_c },  // FLIPADST_ADST
     469             :     { iidtx4_c, iidtx8_c },          // IDTX
     470             :     { aom_idct4_c, iidtx8_c },       // V_DCT
     471             :     { iidtx4_c, aom_idct8_c },       // H_DCT
     472             :     { aom_iadst4_c, iidtx8_c },      // V_ADST
     473             :     { iidtx4_c, aom_iadst8_c },      // H_ADST
     474             :     { aom_iadst4_c, iidtx8_c },      // V_FLIPADST
     475             :     { iidtx4_c, aom_iadst8_c },      // H_FLIPADST
     476             : #endif
     477             :   };
     478           0 :   const int n = 4;
     479           0 :   const int n2 = 8;
     480             : 
     481             :   int i, j;
     482             :   tran_low_t out[8][4], tmp[8][4], outtmp[8];
     483           0 :   tran_low_t *outp = &out[0][0];
     484           0 :   int outstride = n;
     485             : 
     486             :   // inverse transform row vectors and transpose
     487           0 :   for (i = 0; i < n; ++i) {
     488           0 :     IHT_8x4[tx_type].rows(input, outtmp);
     489           0 :     for (j = 0; j < n2; ++j)
     490           0 :       tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
     491           0 :     input += n2;
     492             :   }
     493             : 
     494             :   // inverse transform column vectors
     495           0 :   for (i = 0; i < n2; ++i) {
     496           0 :     IHT_8x4[tx_type].cols(tmp[i], out[i]);
     497             :   }
     498             : 
     499             : #if CONFIG_EXT_TX
     500           0 :   maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
     501             : #endif
     502             : 
     503             :   // Sum with the destination
     504           0 :   for (i = 0; i < n; ++i) {
     505           0 :     for (j = 0; j < n2; ++j) {
     506           0 :       int d = i * stride + j;
     507           0 :       int s = j * outstride + i;
     508           0 :       dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
     509             :     }
     510             :   }
     511           0 : }
     512             : 
     513           0 : void av1_iht4x16_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
     514             :                           int tx_type) {
     515             :   static const transform_2d IHT_4x16[] = {
     516             :     { aom_idct16_c, aom_idct4_c },    // DCT_DCT
     517             :     { aom_iadst16_c, aom_idct4_c },   // ADST_DCT
     518             :     { aom_idct16_c, aom_iadst4_c },   // DCT_ADST
     519             :     { aom_iadst16_c, aom_iadst4_c },  // ADST_ADST
     520             : #if CONFIG_EXT_TX
     521             :     { aom_iadst16_c, aom_idct4_c },   // FLIPADST_DCT
     522             :     { aom_idct16_c, aom_iadst4_c },   // DCT_FLIPADST
     523             :     { aom_iadst16_c, aom_iadst4_c },  // FLIPADST_FLIPADST
     524             :     { aom_iadst16_c, aom_iadst4_c },  // ADST_FLIPADST
     525             :     { aom_iadst16_c, aom_iadst4_c },  // FLIPADST_ADST
     526             :     { iidtx16_c, iidtx4_c },          // IDTX
     527             :     { aom_idct16_c, iidtx4_c },       // V_DCT
     528             :     { iidtx16_c, aom_idct4_c },       // H_DCT
     529             :     { aom_iadst16_c, iidtx4_c },      // V_ADST
     530             :     { iidtx16_c, aom_iadst4_c },      // H_ADST
     531             :     { aom_iadst16_c, iidtx4_c },      // V_FLIPADST
     532             :     { iidtx16_c, aom_iadst4_c },      // H_FLIPADST
     533             : #endif
     534             :   };
     535             : 
     536           0 :   const int n = 4;
     537           0 :   const int n4 = 16;
     538             :   int i, j;
     539             :   tran_low_t out[4][16], tmp[4][16], outtmp[4];
     540           0 :   tran_low_t *outp = &out[0][0];
     541           0 :   int outstride = n4;
     542             : 
     543             :   // inverse transform row vectors and transpose
     544           0 :   for (i = 0; i < n4; ++i) {
     545           0 :     IHT_4x16[tx_type].rows(input, outtmp);
     546           0 :     for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j];
     547           0 :     input += n;
     548             :   }
     549             : 
     550             :   // inverse transform column vectors
     551           0 :   for (i = 0; i < n; ++i) IHT_4x16[tx_type].cols(tmp[i], out[i]);
     552             : 
     553             : #if CONFIG_EXT_TX
     554           0 :   maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n4, n);
     555             : #endif
     556             : 
     557             :   // Sum with the destination
     558           0 :   for (i = 0; i < n4; ++i) {
     559           0 :     for (j = 0; j < n; ++j) {
     560           0 :       int d = i * stride + j;
     561           0 :       int s = j * outstride + i;
     562           0 :       dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
     563             :     }
     564             :   }
     565           0 : }
     566             : 
     567           0 : void av1_iht16x4_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
     568             :                           int tx_type) {
     569             :   static const transform_2d IHT_16x4[] = {
     570             :     { aom_idct4_c, aom_idct16_c },    // DCT_DCT
     571             :     { aom_iadst4_c, aom_idct16_c },   // ADST_DCT
     572             :     { aom_idct4_c, aom_iadst16_c },   // DCT_ADST
     573             :     { aom_iadst4_c, aom_iadst16_c },  // ADST_ADST
     574             : #if CONFIG_EXT_TX
     575             :     { aom_iadst4_c, aom_idct16_c },   // FLIPADST_DCT
     576             :     { aom_idct4_c, aom_iadst16_c },   // DCT_FLIPADST
     577             :     { aom_iadst4_c, aom_iadst16_c },  // FLIPADST_FLIPADST
     578             :     { aom_iadst4_c, aom_iadst16_c },  // ADST_FLIPADST
     579             :     { aom_iadst4_c, aom_iadst16_c },  // FLIPADST_ADST
     580             :     { iidtx4_c, iidtx16_c },          // IDTX
     581             :     { aom_idct4_c, iidtx16_c },       // V_DCT
     582             :     { iidtx4_c, aom_idct16_c },       // H_DCT
     583             :     { aom_iadst4_c, iidtx16_c },      // V_ADST
     584             :     { iidtx4_c, aom_iadst16_c },      // H_ADST
     585             :     { aom_iadst4_c, iidtx16_c },      // V_FLIPADST
     586             :     { iidtx4_c, aom_iadst16_c },      // H_FLIPADST
     587             : #endif
     588             :   };
     589           0 :   const int n = 4;
     590           0 :   const int n4 = 16;
     591             : 
     592             :   int i, j;
     593             :   tran_low_t out[16][4], tmp[16][4], outtmp[16];
     594           0 :   tran_low_t *outp = &out[0][0];
     595           0 :   int outstride = n;
     596             : 
     597             :   // inverse transform row vectors and transpose
     598           0 :   for (i = 0; i < n; ++i) {
     599           0 :     IHT_16x4[tx_type].rows(input, outtmp);
     600           0 :     for (j = 0; j < n4; ++j) tmp[j][i] = outtmp[j];
     601           0 :     input += n4;
     602             :   }
     603             : 
     604             :   // inverse transform column vectors
     605           0 :   for (i = 0; i < n4; ++i) IHT_16x4[tx_type].cols(tmp[i], out[i]);
     606             : 
     607             : #if CONFIG_EXT_TX
     608           0 :   maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n4);
     609             : #endif
     610             : 
     611             :   // Sum with the destination
     612           0 :   for (i = 0; i < n; ++i) {
     613           0 :     for (j = 0; j < n4; ++j) {
     614           0 :       int d = i * stride + j;
     615           0 :       int s = j * outstride + i;
     616           0 :       dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
     617             :     }
     618             :   }
     619           0 : }
     620             : 
     621           0 : void av1_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
     622             :                            int tx_type) {
     623             :   static const transform_2d IHT_8x16[] = {
     624             :     { aom_idct16_c, aom_idct8_c },    // DCT_DCT
     625             :     { aom_iadst16_c, aom_idct8_c },   // ADST_DCT
     626             :     { aom_idct16_c, aom_iadst8_c },   // DCT_ADST
     627             :     { aom_iadst16_c, aom_iadst8_c },  // ADST_ADST
     628             : #if CONFIG_EXT_TX
     629             :     { aom_iadst16_c, aom_idct8_c },   // FLIPADST_DCT
     630             :     { aom_idct16_c, aom_iadst8_c },   // DCT_FLIPADST
     631             :     { aom_iadst16_c, aom_iadst8_c },  // FLIPADST_FLIPADST
     632             :     { aom_iadst16_c, aom_iadst8_c },  // ADST_FLIPADST
     633             :     { aom_iadst16_c, aom_iadst8_c },  // FLIPADST_ADST
     634             :     { iidtx16_c, iidtx8_c },          // IDTX
     635             :     { aom_idct16_c, iidtx8_c },       // V_DCT
     636             :     { iidtx16_c, aom_idct8_c },       // H_DCT
     637             :     { aom_iadst16_c, iidtx8_c },      // V_ADST
     638             :     { iidtx16_c, aom_iadst8_c },      // H_ADST
     639             :     { aom_iadst16_c, iidtx8_c },      // V_FLIPADST
     640             :     { iidtx16_c, aom_iadst8_c },      // H_FLIPADST
     641             : #endif
     642             :   };
     643             : 
     644           0 :   const int n = 8;
     645           0 :   const int n2 = 16;
     646             :   int i, j;
     647             :   tran_low_t out[8][16], tmp[8][16], outtmp[8];
     648           0 :   tran_low_t *outp = &out[0][0];
     649           0 :   int outstride = n2;
     650             : 
     651             :   // inverse transform row vectors and transpose
     652           0 :   for (i = 0; i < n2; ++i) {
     653           0 :     IHT_8x16[tx_type].rows(input, outtmp);
     654           0 :     for (j = 0; j < n; ++j)
     655           0 :       tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
     656           0 :     input += n;
     657             :   }
     658             : 
     659             :   // inverse transform column vectors
     660           0 :   for (i = 0; i < n; ++i) {
     661           0 :     IHT_8x16[tx_type].cols(tmp[i], out[i]);
     662             :   }
     663             : 
     664             : #if CONFIG_EXT_TX
     665           0 :   maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
     666             : #endif
     667             : 
     668             :   // Sum with the destination
     669           0 :   for (i = 0; i < n2; ++i) {
     670           0 :     for (j = 0; j < n; ++j) {
     671           0 :       int d = i * stride + j;
     672           0 :       int s = j * outstride + i;
     673           0 :       dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
     674             :     }
     675             :   }
     676           0 : }
     677             : 
     678           0 : void av1_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
     679             :                            int tx_type) {
     680             :   static const transform_2d IHT_16x8[] = {
     681             :     { aom_idct8_c, aom_idct16_c },    // DCT_DCT
     682             :     { aom_iadst8_c, aom_idct16_c },   // ADST_DCT
     683             :     { aom_idct8_c, aom_iadst16_c },   // DCT_ADST
     684             :     { aom_iadst8_c, aom_iadst16_c },  // ADST_ADST
     685             : #if CONFIG_EXT_TX
     686             :     { aom_iadst8_c, aom_idct16_c },   // FLIPADST_DCT
     687             :     { aom_idct8_c, aom_iadst16_c },   // DCT_FLIPADST
     688             :     { aom_iadst8_c, aom_iadst16_c },  // FLIPADST_FLIPADST
     689             :     { aom_iadst8_c, aom_iadst16_c },  // ADST_FLIPADST
     690             :     { aom_iadst8_c, aom_iadst16_c },  // FLIPADST_ADST
     691             :     { iidtx8_c, iidtx16_c },          // IDTX
     692             :     { aom_idct8_c, iidtx16_c },       // V_DCT
     693             :     { iidtx8_c, aom_idct16_c },       // H_DCT
     694             :     { aom_iadst8_c, iidtx16_c },      // V_ADST
     695             :     { iidtx8_c, aom_iadst16_c },      // H_ADST
     696             :     { aom_iadst8_c, iidtx16_c },      // V_FLIPADST
     697             :     { iidtx8_c, aom_iadst16_c },      // H_FLIPADST
     698             : #endif
     699             :   };
     700           0 :   const int n = 8;
     701           0 :   const int n2 = 16;
     702             : 
     703             :   int i, j;
     704             :   tran_low_t out[16][8], tmp[16][8], outtmp[16];
     705           0 :   tran_low_t *outp = &out[0][0];
     706           0 :   int outstride = n;
     707             : 
     708             :   // inverse transform row vectors and transpose
     709           0 :   for (i = 0; i < n; ++i) {
     710           0 :     IHT_16x8[tx_type].rows(input, outtmp);
     711           0 :     for (j = 0; j < n2; ++j)
     712           0 :       tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
     713           0 :     input += n2;
     714             :   }
     715             : 
     716             :   // inverse transform column vectors
     717           0 :   for (i = 0; i < n2; ++i) {
     718           0 :     IHT_16x8[tx_type].cols(tmp[i], out[i]);
     719             :   }
     720             : 
     721             : #if CONFIG_EXT_TX
     722           0 :   maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
     723             : #endif
     724             : 
     725             :   // Sum with the destination
     726           0 :   for (i = 0; i < n; ++i) {
     727           0 :     for (j = 0; j < n2; ++j) {
     728           0 :       int d = i * stride + j;
     729           0 :       int s = j * outstride + i;
     730           0 :       dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
     731             :     }
     732             :   }
     733           0 : }
     734             : 
     735           0 : void av1_iht8x32_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
     736             :                            int tx_type) {
     737             :   static const transform_2d IHT_8x32[] = {
     738             :     { aom_idct32_c, aom_idct8_c },     // DCT_DCT
     739             :     { ihalfright32_c, aom_idct8_c },   // ADST_DCT
     740             :     { aom_idct32_c, aom_iadst8_c },    // DCT_ADST
     741             :     { ihalfright32_c, aom_iadst8_c },  // ADST_ADST
     742             : #if CONFIG_EXT_TX
     743             :     { ihalfright32_c, aom_idct8_c },   // FLIPADST_DCT
     744             :     { aom_idct32_c, aom_iadst8_c },    // DCT_FLIPADST
     745             :     { ihalfright32_c, aom_iadst8_c },  // FLIPADST_FLIPADST
     746             :     { ihalfright32_c, aom_iadst8_c },  // ADST_FLIPADST
     747             :     { ihalfright32_c, aom_iadst8_c },  // FLIPADST_ADST
     748             :     { iidtx32_c, iidtx8_c },           // IDTX
     749             :     { aom_idct32_c, iidtx8_c },        // V_DCT
     750             :     { iidtx32_c, aom_idct8_c },        // H_DCT
     751             :     { ihalfright32_c, iidtx8_c },      // V_ADST
     752             :     { iidtx32_c, aom_iadst8_c },       // H_ADST
     753             :     { ihalfright32_c, iidtx8_c },      // V_FLIPADST
     754             :     { iidtx32_c, aom_iadst8_c },       // H_FLIPADST
     755             : #endif
     756             :   };
     757             : 
     758           0 :   const int n = 8;
     759           0 :   const int n4 = 32;
     760             :   int i, j;
     761             :   tran_low_t out[8][32], tmp[8][32], outtmp[8];
     762           0 :   tran_low_t *outp = &out[0][0];
     763           0 :   int outstride = n4;
     764             : 
     765             :   // inverse transform row vectors and transpose
     766           0 :   for (i = 0; i < n4; ++i) {
     767           0 :     IHT_8x32[tx_type].rows(input, outtmp);
     768           0 :     for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j];
     769           0 :     input += n;
     770             :   }
     771             : 
     772             :   // inverse transform column vectors
     773           0 :   for (i = 0; i < n; ++i) IHT_8x32[tx_type].cols(tmp[i], out[i]);
     774             : 
     775             : #if CONFIG_EXT_TX
     776           0 :   maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n4, n);
     777             : #endif
     778             : 
     779             :   // Sum with the destination
     780           0 :   for (i = 0; i < n4; ++i) {
     781           0 :     for (j = 0; j < n; ++j) {
     782           0 :       int d = i * stride + j;
     783           0 :       int s = j * outstride + i;
     784           0 :       dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
     785             :     }
     786             :   }
     787           0 : }
     788             : 
     789           0 : void av1_iht32x8_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
     790             :                            int tx_type) {
     791             :   static const transform_2d IHT_32x8[] = {
     792             :     { aom_idct8_c, aom_idct32_c },     // DCT_DCT
     793             :     { aom_iadst8_c, aom_idct32_c },    // ADST_DCT
     794             :     { aom_idct8_c, ihalfright32_c },   // DCT_ADST
     795             :     { aom_iadst8_c, ihalfright32_c },  // ADST_ADST
     796             : #if CONFIG_EXT_TX
     797             :     { aom_iadst8_c, aom_idct32_c },    // FLIPADST_DCT
     798             :     { aom_idct8_c, ihalfright32_c },   // DCT_FLIPADST
     799             :     { aom_iadst8_c, ihalfright32_c },  // FLIPADST_FLIPADST
     800             :     { aom_iadst8_c, ihalfright32_c },  // ADST_FLIPADST
     801             :     { aom_iadst8_c, ihalfright32_c },  // FLIPADST_ADST
     802             :     { iidtx8_c, iidtx32_c },           // IDTX
     803             :     { aom_idct8_c, iidtx32_c },        // V_DCT
     804             :     { iidtx8_c, aom_idct32_c },        // H_DCT
     805             :     { aom_iadst8_c, iidtx32_c },       // V_ADST
     806             :     { iidtx8_c, ihalfright32_c },      // H_ADST
     807             :     { aom_iadst8_c, iidtx32_c },       // V_FLIPADST
     808             :     { iidtx8_c, ihalfright32_c },      // H_FLIPADST
     809             : #endif
     810             :   };
     811           0 :   const int n = 8;
     812           0 :   const int n4 = 32;
     813             : 
     814             :   int i, j;
     815             :   tran_low_t out[32][8], tmp[32][8], outtmp[32];
     816           0 :   tran_low_t *outp = &out[0][0];
     817           0 :   int outstride = n;
     818             : 
     819             :   // inverse transform row vectors and transpose
     820           0 :   for (i = 0; i < n; ++i) {
     821           0 :     IHT_32x8[tx_type].rows(input, outtmp);
     822           0 :     for (j = 0; j < n4; ++j) tmp[j][i] = outtmp[j];
     823           0 :     input += n4;
     824             :   }
     825             : 
     826             :   // inverse transform column vectors
     827           0 :   for (i = 0; i < n4; ++i) IHT_32x8[tx_type].cols(tmp[i], out[i]);
     828             : 
     829             : #if CONFIG_EXT_TX
     830           0 :   maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n4);
     831             : #endif
     832             : 
     833             :   // Sum with the destination
     834           0 :   for (i = 0; i < n; ++i) {
     835           0 :     for (j = 0; j < n4; ++j) {
     836           0 :       int d = i * stride + j;
     837           0 :       int s = j * outstride + i;
     838           0 :       dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
     839             :     }
     840             :   }
     841           0 : }
     842             : 
     843           0 : void av1_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
     844             :                             int tx_type) {
     845             :   static const transform_2d IHT_16x32[] = {
     846             :     { aom_idct32_c, aom_idct16_c },     // DCT_DCT
     847             :     { ihalfright32_c, aom_idct16_c },   // ADST_DCT
     848             :     { aom_idct32_c, aom_iadst16_c },    // DCT_ADST
     849             :     { ihalfright32_c, aom_iadst16_c },  // ADST_ADST
     850             : #if CONFIG_EXT_TX
     851             :     { ihalfright32_c, aom_idct16_c },   // FLIPADST_DCT
     852             :     { aom_idct32_c, aom_iadst16_c },    // DCT_FLIPADST
     853             :     { ihalfright32_c, aom_iadst16_c },  // FLIPADST_FLIPADST
     854             :     { ihalfright32_c, aom_iadst16_c },  // ADST_FLIPADST
     855             :     { ihalfright32_c, aom_iadst16_c },  // FLIPADST_ADST
     856             :     { iidtx32_c, iidtx16_c },           // IDTX
     857             :     { aom_idct32_c, iidtx16_c },        // V_DCT
     858             :     { iidtx32_c, aom_idct16_c },        // H_DCT
     859             :     { ihalfright32_c, iidtx16_c },      // V_ADST
     860             :     { iidtx32_c, aom_iadst16_c },       // H_ADST
     861             :     { ihalfright32_c, iidtx16_c },      // V_FLIPADST
     862             :     { iidtx32_c, aom_iadst16_c },       // H_FLIPADST
     863             : #endif
     864             :   };
     865             : 
     866           0 :   const int n = 16;
     867           0 :   const int n2 = 32;
     868             :   int i, j;
     869             :   tran_low_t out[16][32], tmp[16][32], outtmp[16];
     870           0 :   tran_low_t *outp = &out[0][0];
     871           0 :   int outstride = n2;
     872             : 
     873             :   // inverse transform row vectors and transpose
     874           0 :   for (i = 0; i < n2; ++i) {
     875           0 :     IHT_16x32[tx_type].rows(input, outtmp);
     876           0 :     for (j = 0; j < n; ++j)
     877           0 :       tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
     878           0 :     input += n;
     879             :   }
     880             : 
     881             :   // inverse transform column vectors
     882           0 :   for (i = 0; i < n; ++i) {
     883           0 :     IHT_16x32[tx_type].cols(tmp[i], out[i]);
     884             :   }
     885             : 
     886             : #if CONFIG_EXT_TX
     887           0 :   maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
     888             : #endif
     889             : 
     890             :   // Sum with the destination
     891           0 :   for (i = 0; i < n2; ++i) {
     892           0 :     for (j = 0; j < n; ++j) {
     893           0 :       int d = i * stride + j;
     894           0 :       int s = j * outstride + i;
     895           0 :       dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
     896             :     }
     897             :   }
     898           0 : }
     899             : 
     900           0 : void av1_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
     901             :                             int tx_type) {
     902             :   static const transform_2d IHT_32x16[] = {
     903             :     { aom_idct16_c, aom_idct32_c },     // DCT_DCT
     904             :     { aom_iadst16_c, aom_idct32_c },    // ADST_DCT
     905             :     { aom_idct16_c, ihalfright32_c },   // DCT_ADST
     906             :     { aom_iadst16_c, ihalfright32_c },  // ADST_ADST
     907             : #if CONFIG_EXT_TX
     908             :     { aom_iadst16_c, aom_idct32_c },    // FLIPADST_DCT
     909             :     { aom_idct16_c, ihalfright32_c },   // DCT_FLIPADST
     910             :     { aom_iadst16_c, ihalfright32_c },  // FLIPADST_FLIPADST
     911             :     { aom_iadst16_c, ihalfright32_c },  // ADST_FLIPADST
     912             :     { aom_iadst16_c, ihalfright32_c },  // FLIPADST_ADST
     913             :     { iidtx16_c, iidtx32_c },           // IDTX
     914             :     { aom_idct16_c, iidtx32_c },        // V_DCT
     915             :     { iidtx16_c, aom_idct32_c },        // H_DCT
     916             :     { aom_iadst16_c, iidtx32_c },       // V_ADST
     917             :     { iidtx16_c, ihalfright32_c },      // H_ADST
     918             :     { aom_iadst16_c, iidtx32_c },       // V_FLIPADST
     919             :     { iidtx16_c, ihalfright32_c },      // H_FLIPADST
     920             : #endif
     921             :   };
     922           0 :   const int n = 16;
     923           0 :   const int n2 = 32;
     924             : 
     925             :   int i, j;
     926             :   tran_low_t out[32][16], tmp[32][16], outtmp[32];
     927           0 :   tran_low_t *outp = &out[0][0];
     928           0 :   int outstride = n;
     929             : 
     930             :   // inverse transform row vectors and transpose
     931           0 :   for (i = 0; i < n; ++i) {
     932           0 :     IHT_32x16[tx_type].rows(input, outtmp);
     933           0 :     for (j = 0; j < n2; ++j)
     934           0 :       tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
     935           0 :     input += n2;
     936             :   }
     937             : 
     938             :   // inverse transform column vectors
     939           0 :   for (i = 0; i < n2; ++i) {
     940           0 :     IHT_32x16[tx_type].cols(tmp[i], out[i]);
     941             :   }
     942             : 
     943             : #if CONFIG_EXT_TX
     944           0 :   maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
     945             : #endif
     946             : 
     947             :   // Sum with the destination
     948           0 :   for (i = 0; i < n; ++i) {
     949           0 :     for (j = 0; j < n2; ++j) {
     950           0 :       int d = i * stride + j;
     951           0 :       int s = j * outstride + i;
     952           0 :       dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
     953             :     }
     954             :   }
     955           0 : }
     956             : 
     957           0 : void av1_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
     958             :                          int tx_type) {
     959             :   static const transform_2d IHT_8[] = {
     960             :     { aom_idct8_c, aom_idct8_c },    // DCT_DCT  = 0
     961             :     { aom_iadst8_c, aom_idct8_c },   // ADST_DCT = 1
     962             :     { aom_idct8_c, aom_iadst8_c },   // DCT_ADST = 2
     963             :     { aom_iadst8_c, aom_iadst8_c },  // ADST_ADST = 3
     964             : #if CONFIG_EXT_TX
     965             :     { aom_iadst8_c, aom_idct8_c },   // FLIPADST_DCT
     966             :     { aom_idct8_c, aom_iadst8_c },   // DCT_FLIPADST
     967             :     { aom_iadst8_c, aom_iadst8_c },  // FLIPADST_FLIPADST
     968             :     { aom_iadst8_c, aom_iadst8_c },  // ADST_FLIPADST
     969             :     { aom_iadst8_c, aom_iadst8_c },  // FLIPADST_ADST
     970             :     { iidtx8_c, iidtx8_c },          // IDTX
     971             :     { aom_idct8_c, iidtx8_c },       // V_DCT
     972             :     { iidtx8_c, aom_idct8_c },       // H_DCT
     973             :     { aom_iadst8_c, iidtx8_c },      // V_ADST
     974             :     { iidtx8_c, aom_iadst8_c },      // H_ADST
     975             :     { aom_iadst8_c, iidtx8_c },      // V_FLIPADST
     976             :     { iidtx8_c, aom_iadst8_c },      // H_FLIPADST
     977             : #endif                               // CONFIG_EXT_TX
     978             :   };
     979             : 
     980             :   int i, j;
     981             :   tran_low_t tmp[8][8];
     982             :   tran_low_t out[8][8];
     983           0 :   tran_low_t *outp = &out[0][0];
     984           0 :   int outstride = 8;
     985             : 
     986             :   // inverse transform row vectors
     987           0 :   for (i = 0; i < 8; ++i) {
     988           0 :     IHT_8[tx_type].rows(input, out[i]);
     989           0 :     input += 8;
     990             :   }
     991             : 
     992             :   // transpose
     993           0 :   for (i = 0; i < 8; i++) {
     994           0 :     for (j = 0; j < 8; j++) {
     995           0 :       tmp[j][i] = out[i][j];
     996             :     }
     997             :   }
     998             : 
     999             :   // inverse transform column vectors
    1000           0 :   for (i = 0; i < 8; ++i) {
    1001           0 :     IHT_8[tx_type].cols(tmp[i], out[i]);
    1002             :   }
    1003             : 
    1004             : #if CONFIG_EXT_TX
    1005           0 :   maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 8, 8);
    1006             : #endif
    1007             : 
    1008             :   // Sum with the destination
    1009           0 :   for (i = 0; i < 8; ++i) {
    1010           0 :     for (j = 0; j < 8; ++j) {
    1011           0 :       int d = i * stride + j;
    1012           0 :       int s = j * outstride + i;
    1013           0 :       dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    1014             :     }
    1015             :   }
    1016           0 : }
    1017             : 
    1018           0 : void av1_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
    1019             :                             int tx_type) {
    1020             :   static const transform_2d IHT_16[] = {
    1021             :     { aom_idct16_c, aom_idct16_c },    // DCT_DCT  = 0
    1022             :     { aom_iadst16_c, aom_idct16_c },   // ADST_DCT = 1
    1023             :     { aom_idct16_c, aom_iadst16_c },   // DCT_ADST = 2
    1024             :     { aom_iadst16_c, aom_iadst16_c },  // ADST_ADST = 3
    1025             : #if CONFIG_EXT_TX
    1026             :     { aom_iadst16_c, aom_idct16_c },   // FLIPADST_DCT
    1027             :     { aom_idct16_c, aom_iadst16_c },   // DCT_FLIPADST
    1028             :     { aom_iadst16_c, aom_iadst16_c },  // FLIPADST_FLIPADST
    1029             :     { aom_iadst16_c, aom_iadst16_c },  // ADST_FLIPADST
    1030             :     { aom_iadst16_c, aom_iadst16_c },  // FLIPADST_ADST
    1031             :     { iidtx16_c, iidtx16_c },          // IDTX
    1032             :     { aom_idct16_c, iidtx16_c },       // V_DCT
    1033             :     { iidtx16_c, aom_idct16_c },       // H_DCT
    1034             :     { aom_iadst16_c, iidtx16_c },      // V_ADST
    1035             :     { iidtx16_c, aom_iadst16_c },      // H_ADST
    1036             :     { aom_iadst16_c, iidtx16_c },      // V_FLIPADST
    1037             :     { iidtx16_c, aom_iadst16_c },      // H_FLIPADST
    1038             : #endif                                 // CONFIG_EXT_TX
    1039             :   };
    1040             : 
    1041             :   int i, j;
    1042             :   tran_low_t tmp[16][16];
    1043             :   tran_low_t out[16][16];
    1044           0 :   tran_low_t *outp = &out[0][0];
    1045           0 :   int outstride = 16;
    1046             : 
    1047             :   // inverse transform row vectors
    1048           0 :   for (i = 0; i < 16; ++i) {
    1049           0 :     IHT_16[tx_type].rows(input, out[i]);
    1050           0 :     input += 16;
    1051             :   }
    1052             : 
    1053             :   // transpose
    1054           0 :   for (i = 0; i < 16; i++) {
    1055           0 :     for (j = 0; j < 16; j++) {
    1056           0 :       tmp[j][i] = out[i][j];
    1057             :     }
    1058             :   }
    1059             : 
    1060             :   // inverse transform column vectors
    1061           0 :   for (i = 0; i < 16; ++i) {
    1062           0 :     IHT_16[tx_type].cols(tmp[i], out[i]);
    1063             :   }
    1064             : 
    1065             : #if CONFIG_EXT_TX
    1066           0 :   maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 16, 16);
    1067             : #endif
    1068             : 
    1069             :   // Sum with the destination
    1070           0 :   for (i = 0; i < 16; ++i) {
    1071           0 :     for (j = 0; j < 16; ++j) {
    1072           0 :       int d = i * stride + j;
    1073           0 :       int s = j * outstride + i;
    1074           0 :       dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    1075             :     }
    1076             :   }
    1077           0 : }
    1078             : 
    1079             : #if CONFIG_EXT_TX
    1080           0 : void av1_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int stride,
    1081             :                              int tx_type) {
    1082             :   static const transform_2d IHT_32[] = {
    1083             :     { aom_idct32_c, aom_idct32_c },      // DCT_DCT
    1084             :     { ihalfright32_c, aom_idct32_c },    // ADST_DCT
    1085             :     { aom_idct32_c, ihalfright32_c },    // DCT_ADST
    1086             :     { ihalfright32_c, ihalfright32_c },  // ADST_ADST
    1087             :     { ihalfright32_c, aom_idct32_c },    // FLIPADST_DCT
    1088             :     { aom_idct32_c, ihalfright32_c },    // DCT_FLIPADST
    1089             :     { ihalfright32_c, ihalfright32_c },  // FLIPADST_FLIPADST
    1090             :     { ihalfright32_c, ihalfright32_c },  // ADST_FLIPADST
    1091             :     { ihalfright32_c, ihalfright32_c },  // FLIPADST_ADST
    1092             :     { iidtx32_c, iidtx32_c },            // IDTX
    1093             :     { aom_idct32_c, iidtx32_c },         // V_DCT
    1094             :     { iidtx32_c, aom_idct32_c },         // H_DCT
    1095             :     { ihalfright32_c, iidtx32_c },       // V_ADST
    1096             :     { iidtx32_c, ihalfright32_c },       // H_ADST
    1097             :     { ihalfright32_c, iidtx32_c },       // V_FLIPADST
    1098             :     { iidtx32_c, ihalfright32_c },       // H_FLIPADST
    1099             :   };
    1100             : 
    1101             :   int i, j;
    1102             :   tran_low_t tmp[32][32];
    1103             :   tran_low_t out[32][32];
    1104           0 :   tran_low_t *outp = &out[0][0];
    1105           0 :   int outstride = 32;
    1106             : 
    1107             :   // inverse transform row vectors
    1108           0 :   for (i = 0; i < 32; ++i) {
    1109           0 :     IHT_32[tx_type].rows(input, out[i]);
    1110           0 :     input += 32;
    1111             :   }
    1112             : 
    1113             :   // transpose
    1114           0 :   for (i = 0; i < 32; i++) {
    1115           0 :     for (j = 0; j < 32; j++) {
    1116           0 :       tmp[j][i] = out[i][j];
    1117             :     }
    1118             :   }
    1119             : 
    1120             :   // inverse transform column vectors
    1121           0 :   for (i = 0; i < 32; ++i) {
    1122           0 :     IHT_32[tx_type].cols(tmp[i], out[i]);
    1123             :   }
    1124             : 
    1125           0 :   maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 32, 32);
    1126             : 
    1127             :   // Sum with the destination
    1128           0 :   for (i = 0; i < 32; ++i) {
    1129           0 :     for (j = 0; j < 32; ++j) {
    1130           0 :       int d = i * stride + j;
    1131           0 :       int s = j * outstride + i;
    1132           0 :       dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
    1133             :     }
    1134             :   }
    1135           0 : }
    1136             : #endif  // CONFIG_EXT_TX
    1137             : 
    1138             : #if CONFIG_TX64X64
    1139             : void av1_iht64x64_4096_add_c(const tran_low_t *input, uint8_t *dest, int stride,
    1140             :                              int tx_type) {
    1141             :   static const transform_2d IHT_64[] = {
    1142             :     { idct64_col_c, idct64_row_c },      // DCT_DCT
    1143             :     { ihalfright64_c, idct64_row_c },    // ADST_DCT
    1144             :     { idct64_col_c, ihalfright64_c },    // DCT_ADST
    1145             :     { ihalfright64_c, ihalfright64_c },  // ADST_ADST
    1146             : #if CONFIG_EXT_TX
    1147             :     { ihalfright64_c, idct64_row_c },    // FLIPADST_DCT
    1148             :     { idct64_col_c, ihalfright64_c },    // DCT_FLIPADST
    1149             :     { ihalfright64_c, ihalfright64_c },  // FLIPADST_FLIPADST
    1150             :     { ihalfright64_c, ihalfright64_c },  // ADST_FLIPADST
    1151             :     { ihalfright64_c, ihalfright64_c },  // FLIPADST_ADST
    1152             :     { iidtx64_c, iidtx64_c },            // IDTX
    1153             :     { idct64_col_c, iidtx64_c },         // V_DCT
    1154             :     { iidtx64_c, idct64_row_c },         // H_DCT
    1155             :     { ihalfright64_c, iidtx64_c },       // V_ADST
    1156             :     { iidtx64_c, ihalfright64_c },       // H_ADST
    1157             :     { ihalfright64_c, iidtx64_c },       // V_FLIPADST
    1158             :     { iidtx64_c, ihalfright64_c },       // H_FLIPADST
    1159             : #endif                                   // CONFIG_EXT_TX
    1160             :   };
    1161             : 
    1162             :   int i, j;
    1163             :   tran_low_t tmp[64][64];
    1164             :   tran_low_t out[64][64];
    1165             :   tran_low_t *outp = &out[0][0];
    1166             :   int outstride = 64;
    1167             : 
    1168             :   // inverse transform row vectors
    1169             :   for (i = 0; i < 64; ++i) {
    1170             :     IHT_64[tx_type].rows(input, out[i]);
    1171             :     for (j = 0; j < 64; ++j) out[i][j] = ROUND_POWER_OF_TWO(out[i][j], 1);
    1172             :     input += 64;
    1173             :   }
    1174             : 
    1175             :   // transpose
    1176             :   for (i = 0; i < 64; i++) {
    1177             :     for (j = 0; j < 64; j++) {
    1178             :       tmp[j][i] = out[i][j];
    1179             :     }
    1180             :   }
    1181             : 
    1182             :   // inverse transform column vectors
    1183             :   for (i = 0; i < 64; ++i) {
    1184             :     IHT_64[tx_type].cols(tmp[i], out[i]);
    1185             :   }
    1186             : 
    1187             : #if CONFIG_EXT_TX
    1188             :   maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 64, 64);
    1189             : #endif  // CONFIG_EXT_TX
    1190             : 
    1191             :   // Sum with the destination
    1192             :   for (i = 0; i < 64; ++i) {
    1193             :     for (j = 0; j < 64; ++j) {
    1194             :       int d = i * stride + j;
    1195             :       int s = j * outstride + i;
    1196             :       dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
    1197             :     }
    1198             :   }
    1199             : }
    1200             : #endif  // CONFIG_TX64X64
    1201             : 
    1202             : // idct
    1203           0 : void av1_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
    1204             :                      int eob) {
    1205           0 :   if (eob > 1)
    1206           0 :     aom_idct4x4_16_add(input, dest, stride);
    1207             :   else
    1208           0 :     aom_idct4x4_1_add(input, dest, stride);
    1209           0 : }
    1210             : 
    1211           0 : void av1_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
    1212             :                      int eob) {
    1213           0 :   if (eob > 1)
    1214           0 :     aom_iwht4x4_16_add(input, dest, stride);
    1215             :   else
    1216           0 :     aom_iwht4x4_1_add(input, dest, stride);
    1217           0 : }
    1218             : 
    1219           0 : static void idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
    1220             :                         const INV_TXFM_PARAM *param) {
    1221             : // If dc is 1, then input[0] is the reconstructed value, do not need
    1222             : // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
    1223             : 
    1224             : // The calculation can be simplified if there are not many non-zero dct
    1225             : // coefficients. Use eobs to decide what to do.
    1226             : // TODO(yunqingwang): "eobs = 1" case is also handled in av1_short_idct8x8_c.
    1227             : // Combine that with code here.
    1228             : #if CONFIG_ADAPT_SCAN
    1229             :   const int16_t half = param->eob_threshold[0];
    1230             : #else
    1231           0 :   const int16_t half = 12;
    1232             : #endif
    1233             : 
    1234           0 :   const int eob = param->eob;
    1235           0 :   if (eob == 1)
    1236             :     // DC only DCT coefficient
    1237           0 :     aom_idct8x8_1_add(input, dest, stride);
    1238           0 :   else if (eob <= half)
    1239           0 :     aom_idct8x8_12_add(input, dest, stride);
    1240             :   else
    1241           0 :     aom_idct8x8_64_add(input, dest, stride);
    1242           0 : }
    1243             : 
    1244           0 : static void idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
    1245             :                           const INV_TXFM_PARAM *param) {
    1246             : // The calculation can be simplified if there are not many non-zero dct
    1247             : // coefficients. Use eobs to separate different cases.
    1248             : #if CONFIG_ADAPT_SCAN
    1249             :   const int16_t half = param->eob_threshold[0];
    1250             :   const int16_t quarter = param->eob_threshold[1];
    1251             : #else
    1252           0 :   const int16_t half = 38;
    1253           0 :   const int16_t quarter = 10;
    1254             : #endif
    1255             : 
    1256           0 :   const int eob = param->eob;
    1257           0 :   if (eob == 1) /* DC only DCT coefficient. */
    1258           0 :     aom_idct16x16_1_add(input, dest, stride);
    1259           0 :   else if (eob <= quarter)
    1260           0 :     aom_idct16x16_10_add(input, dest, stride);
    1261           0 :   else if (eob <= half)
    1262           0 :     aom_idct16x16_38_add(input, dest, stride);
    1263             :   else
    1264           0 :     aom_idct16x16_256_add(input, dest, stride);
    1265           0 : }
    1266             : 
    1267           0 : static void idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
    1268             :                           const INV_TXFM_PARAM *param) {
    1269             : #if CONFIG_ADAPT_SCAN
    1270             :   const int16_t half = param->eob_threshold[0];
    1271             :   const int16_t quarter = param->eob_threshold[1];
    1272             : #else
    1273           0 :   const int16_t half = 135;
    1274           0 :   const int16_t quarter = 34;
    1275             : #endif
    1276             : 
    1277           0 :   const int eob = param->eob;
    1278           0 :   if (eob == 1)
    1279           0 :     aom_idct32x32_1_add(input, dest, stride);
    1280           0 :   else if (eob <= quarter)
    1281             :     // non-zero coeff only in upper-left 8x8
    1282           0 :     aom_idct32x32_34_add(input, dest, stride);
    1283           0 :   else if (eob <= half)
    1284             :     // non-zero coeff only in upper-left 16x16
    1285           0 :     aom_idct32x32_135_add(input, dest, stride);
    1286             :   else
    1287           0 :     aom_idct32x32_1024_add(input, dest, stride);
    1288           0 : }
    1289             : 
    1290             : #if CONFIG_TX64X64
    1291             : static void idct64x64_add(const tran_low_t *input, uint8_t *dest, int stride,
    1292             :                           const INV_TXFM_PARAM *param) {
    1293             :   (void)param;
    1294             :   av1_iht64x64_4096_add(input, dest, stride, DCT_DCT);
    1295             : }
    1296             : #endif  // CONFIG_TX64X64
    1297             : 
    1298             : #if CONFIG_CHROMA_2X2
    1299             : static void inv_txfm_add_2x2(const tran_low_t *input, uint8_t *dest, int stride,
    1300             :                              int eob, TX_TYPE tx_type, int lossless) {
    1301             :   tran_high_t a1 = input[0] >> UNIT_QUANT_SHIFT;
    1302             :   tran_high_t b1 = input[1] >> UNIT_QUANT_SHIFT;
    1303             :   tran_high_t c1 = input[2] >> UNIT_QUANT_SHIFT;
    1304             :   tran_high_t d1 = input[3] >> UNIT_QUANT_SHIFT;
    1305             : 
    1306             :   tran_high_t a2 = a1 + c1;
    1307             :   tran_high_t b2 = b1 + d1;
    1308             :   tran_high_t c2 = a1 - c1;
    1309             :   tran_high_t d2 = b1 - d1;
    1310             : 
    1311             :   (void)tx_type;
    1312             :   (void)lossless;
    1313             :   (void)eob;
    1314             : 
    1315             :   a1 = (a2 + b2) >> 2;
    1316             :   b1 = (a2 - b2) >> 2;
    1317             :   c1 = (c2 + d2) >> 2;
    1318             :   d1 = (c2 - d2) >> 2;
    1319             : 
    1320             :   dest[0] = clip_pixel_add(dest[0], WRAPLOW(a1));
    1321             :   dest[1] = clip_pixel_add(dest[1], WRAPLOW(b1));
    1322             :   dest[stride] = clip_pixel_add(dest[stride], WRAPLOW(c1));
    1323             :   dest[stride + 1] = clip_pixel_add(dest[stride + 1], WRAPLOW(d1));
    1324             : }
    1325             : #endif
    1326             : 
    1327           0 : static void inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, int stride,
    1328             :                              int eob, TX_TYPE tx_type, int lossless) {
    1329           0 :   if (lossless) {
    1330           0 :     assert(tx_type == DCT_DCT);
    1331           0 :     av1_iwht4x4_add(input, dest, stride, eob);
    1332           0 :     return;
    1333             :   }
    1334             : 
    1335           0 :   switch (tx_type) {
    1336           0 :     case DCT_DCT: av1_idct4x4_add(input, dest, stride, eob); break;
    1337             :     case ADST_DCT:
    1338             :     case DCT_ADST:
    1339           0 :     case ADST_ADST: av1_iht4x4_16_add(input, dest, stride, tx_type); break;
    1340             : #if CONFIG_EXT_TX
    1341             :     case FLIPADST_DCT:
    1342             :     case DCT_FLIPADST:
    1343             :     case FLIPADST_FLIPADST:
    1344             :     case ADST_FLIPADST:
    1345           0 :     case FLIPADST_ADST: av1_iht4x4_16_add(input, dest, stride, tx_type); break;
    1346             :     case V_DCT:
    1347             :     case H_DCT:
    1348             :     case V_ADST:
    1349             :     case H_ADST:
    1350             :     case V_FLIPADST:
    1351             :     case H_FLIPADST:
    1352             :       // Use C version since DST only exists in C code
    1353           0 :       av1_iht4x4_16_add_c(input, dest, stride, tx_type);
    1354           0 :       break;
    1355           0 :     case IDTX: inv_idtx_add_c(input, dest, stride, 4, tx_type); break;
    1356             : #endif  // CONFIG_EXT_TX
    1357           0 :     default: assert(0); break;
    1358             :   }
    1359             : }
    1360             : 
    1361           0 : static void inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest, int stride,
    1362             :                              int eob, TX_TYPE tx_type) {
    1363             :   (void)eob;
    1364           0 :   av1_iht4x8_32_add(input, dest, stride, tx_type);
    1365           0 : }
    1366             : 
    1367           0 : static void inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest, int stride,
    1368             :                              int eob, TX_TYPE tx_type) {
    1369             :   (void)eob;
    1370           0 :   av1_iht8x4_32_add(input, dest, stride, tx_type);
    1371           0 : }
    1372             : 
    1373             : // These will be used by the masked-tx experiment in the future.
    1374             : #if CONFIG_RECT_TX && CONFIG_EXT_TX && CONFIG_RECT_TX_EXT
    1375             : static void inv_txfm_add_4x16(const tran_low_t *input, uint8_t *dest,
    1376             :                               int stride, int eob, TX_TYPE tx_type) {
    1377             :   (void)eob;
    1378             :   av1_iht4x16_64_add(input, dest, stride, tx_type);
    1379             : }
    1380             : 
    1381             : static void inv_txfm_add_16x4(const tran_low_t *input, uint8_t *dest,
    1382             :                               int stride, int eob, TX_TYPE tx_type) {
    1383             :   (void)eob;
    1384             :   av1_iht16x4_64_add(input, dest, stride, tx_type);
    1385             : }
    1386             : 
    1387             : static void inv_txfm_add_8x32(const tran_low_t *input, uint8_t *dest,
    1388             :                               int stride, int eob, TX_TYPE tx_type) {
    1389             :   (void)eob;
    1390             :   av1_iht8x32_256_add(input, dest, stride, tx_type);
    1391             : }
    1392             : 
    1393             : static void inv_txfm_add_32x8(const tran_low_t *input, uint8_t *dest,
    1394             :                               int stride, int eob, TX_TYPE tx_type) {
    1395             :   (void)eob;
    1396             :   av1_iht32x8_256_add(input, dest, stride, tx_type);
    1397             : }
    1398             : #endif
    1399             : 
    1400           0 : static void inv_txfm_add_8x16(const tran_low_t *input, uint8_t *dest,
    1401             :                               int stride, int eob, TX_TYPE tx_type) {
    1402             :   (void)eob;
    1403           0 :   av1_iht8x16_128_add(input, dest, stride, tx_type);
    1404           0 : }
    1405             : 
    1406           0 : static void inv_txfm_add_16x8(const tran_low_t *input, uint8_t *dest,
    1407             :                               int stride, int eob, TX_TYPE tx_type) {
    1408             :   (void)eob;
    1409           0 :   av1_iht16x8_128_add(input, dest, stride, tx_type);
    1410           0 : }
    1411             : 
    1412           0 : static void inv_txfm_add_16x32(const tran_low_t *input, uint8_t *dest,
    1413             :                                int stride, int eob, TX_TYPE tx_type) {
    1414             :   (void)eob;
    1415           0 :   av1_iht16x32_512_add(input, dest, stride, tx_type);
    1416           0 : }
    1417             : 
    1418           0 : static void inv_txfm_add_32x16(const tran_low_t *input, uint8_t *dest,
    1419             :                                int stride, int eob, TX_TYPE tx_type) {
    1420             :   (void)eob;
    1421           0 :   av1_iht32x16_512_add(input, dest, stride, tx_type);
    1422           0 : }
    1423             : 
    1424           0 : static void inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest, int stride,
    1425             :                              const INV_TXFM_PARAM *param) {
    1426           0 :   const TX_TYPE tx_type = param->tx_type;
    1427           0 :   switch (tx_type) {
    1428           0 :     case DCT_DCT: idct8x8_add(input, dest, stride, param); break;
    1429             :     case ADST_DCT:
    1430             :     case DCT_ADST:
    1431           0 :     case ADST_ADST: av1_iht8x8_64_add(input, dest, stride, tx_type); break;
    1432             : #if CONFIG_EXT_TX
    1433             :     case FLIPADST_DCT:
    1434             :     case DCT_FLIPADST:
    1435             :     case FLIPADST_FLIPADST:
    1436             :     case ADST_FLIPADST:
    1437           0 :     case FLIPADST_ADST: av1_iht8x8_64_add(input, dest, stride, tx_type); break;
    1438             :     case V_DCT:
    1439             :     case H_DCT:
    1440             :     case V_ADST:
    1441             :     case H_ADST:
    1442             :     case V_FLIPADST:
    1443             :     case H_FLIPADST:
    1444             :       // Use C version since DST only exists in C code
    1445           0 :       av1_iht8x8_64_add_c(input, dest, stride, tx_type);
    1446           0 :       break;
    1447           0 :     case IDTX: inv_idtx_add_c(input, dest, stride, 8, tx_type); break;
    1448             : #endif  // CONFIG_EXT_TX
    1449           0 :     default: assert(0); break;
    1450             :   }
    1451           0 : }
    1452             : 
    1453           0 : static void inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest,
    1454             :                                int stride, const INV_TXFM_PARAM *param) {
    1455           0 :   const TX_TYPE tx_type = param->tx_type;
    1456           0 :   switch (tx_type) {
    1457           0 :     case DCT_DCT: idct16x16_add(input, dest, stride, param); break;
    1458             :     case ADST_DCT:
    1459             :     case DCT_ADST:
    1460           0 :     case ADST_ADST: av1_iht16x16_256_add(input, dest, stride, tx_type); break;
    1461             : #if CONFIG_EXT_TX
    1462             :     case FLIPADST_DCT:
    1463             :     case DCT_FLIPADST:
    1464             :     case FLIPADST_FLIPADST:
    1465             :     case ADST_FLIPADST:
    1466             :     case FLIPADST_ADST:
    1467             :     case V_DCT:
    1468             :     case H_DCT:
    1469             :     case V_ADST:
    1470             :     case H_ADST:
    1471             :     case V_FLIPADST:
    1472           0 :     case H_FLIPADST: av1_iht16x16_256_add(input, dest, stride, tx_type); break;
    1473           0 :     case IDTX: inv_idtx_add_c(input, dest, stride, 16, tx_type); break;
    1474             : #endif  // CONFIG_EXT_TX
    1475           0 :     default: assert(0); break;
    1476             :   }
    1477           0 : }
    1478             : 
    1479           0 : static void inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest,
    1480             :                                int stride, const INV_TXFM_PARAM *param) {
    1481           0 :   const TX_TYPE tx_type = param->tx_type;
    1482           0 :   switch (tx_type) {
    1483           0 :     case DCT_DCT: idct32x32_add(input, dest, stride, param); break;
    1484             : #if CONFIG_EXT_TX
    1485             :     case ADST_DCT:
    1486             :     case DCT_ADST:
    1487             :     case ADST_ADST:
    1488             :     case FLIPADST_DCT:
    1489             :     case DCT_FLIPADST:
    1490             :     case FLIPADST_FLIPADST:
    1491             :     case ADST_FLIPADST:
    1492             :     case FLIPADST_ADST:
    1493             :     case V_DCT:
    1494             :     case H_DCT:
    1495             :     case V_ADST:
    1496             :     case H_ADST:
    1497             :     case V_FLIPADST:
    1498             :     case H_FLIPADST:
    1499           0 :       av1_iht32x32_1024_add_c(input, dest, stride, tx_type);
    1500           0 :       break;
    1501           0 :     case IDTX: inv_idtx_add_c(input, dest, stride, 32, tx_type); break;
    1502             : #endif  // CONFIG_EXT_TX
    1503           0 :     default: assert(0); break;
    1504             :   }
    1505           0 : }
    1506             : 
    1507             : #if CONFIG_TX64X64
    1508             : static void inv_txfm_add_64x64(const tran_low_t *input, uint8_t *dest,
    1509             :                                int stride, const INV_TXFM_PARAM *param) {
    1510             :   const TX_TYPE tx_type = param->tx_type;
    1511             :   switch (tx_type) {
    1512             :     case DCT_DCT: idct64x64_add(input, dest, stride, param); break;
    1513             : #if CONFIG_EXT_TX
    1514             :     case ADST_DCT:
    1515             :     case DCT_ADST:
    1516             :     case ADST_ADST:
    1517             :     case FLIPADST_DCT:
    1518             :     case DCT_FLIPADST:
    1519             :     case FLIPADST_FLIPADST:
    1520             :     case ADST_FLIPADST:
    1521             :     case FLIPADST_ADST:
    1522             :     case V_DCT:
    1523             :     case H_DCT:
    1524             :     case V_ADST:
    1525             :     case H_ADST:
    1526             :     case V_FLIPADST:
    1527             :     case H_FLIPADST:
    1528             :       av1_iht64x64_4096_add_c(input, dest, stride, tx_type);
    1529             :       break;
    1530             :     case IDTX: inv_idtx_add_c(input, dest, stride, 64, tx_type); break;
    1531             : #endif  // CONFIG_EXT_TX
    1532             :     default: assert(0); break;
    1533             :   }
    1534             : }
    1535             : #endif  // CONFIG_TX64X64
    1536             : 
    1537             : #if CONFIG_HIGHBITDEPTH
    1538           0 : void av1_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
    1539             :                                 int stride, int tx_type, int bd) {
    1540             :   static const highbd_transform_2d HIGH_IHT_4[] = {
    1541             :     { aom_highbd_idct4_c, aom_highbd_idct4_c },    // DCT_DCT
    1542             :     { aom_highbd_iadst4_c, aom_highbd_idct4_c },   // ADST_DCT
    1543             :     { aom_highbd_idct4_c, aom_highbd_iadst4_c },   // DCT_ADST
    1544             :     { aom_highbd_iadst4_c, aom_highbd_iadst4_c },  // ADST_ADST
    1545             : #if CONFIG_EXT_TX
    1546             :     { aom_highbd_iadst4_c, aom_highbd_idct4_c },   // FLIPADST_DCT
    1547             :     { aom_highbd_idct4_c, aom_highbd_iadst4_c },   // DCT_FLIPADST
    1548             :     { aom_highbd_iadst4_c, aom_highbd_iadst4_c },  // FLIPADST_FLIPADST
    1549             :     { aom_highbd_iadst4_c, aom_highbd_iadst4_c },  // ADST_FLIPADST
    1550             :     { aom_highbd_iadst4_c, aom_highbd_iadst4_c },  // FLIPADST_ADST
    1551             :     { highbd_iidtx4_c, highbd_iidtx4_c },          // IDTX
    1552             :     { aom_highbd_idct4_c, highbd_iidtx4_c },       // V_DCT
    1553             :     { highbd_iidtx4_c, aom_highbd_idct4_c },       // H_DCT
    1554             :     { aom_highbd_iadst4_c, highbd_iidtx4_c },      // V_ADST
    1555             :     { highbd_iidtx4_c, aom_highbd_iadst4_c },      // H_ADST
    1556             :     { aom_highbd_iadst4_c, highbd_iidtx4_c },      // V_FLIPADST
    1557             :     { highbd_iidtx4_c, aom_highbd_iadst4_c },      // H_FLIPADST
    1558             : #endif                                             // CONFIG_EXT_TX
    1559             :   };
    1560             : 
    1561           0 :   uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
    1562             : 
    1563             :   int i, j;
    1564             :   tran_low_t tmp[4][4];
    1565             :   tran_low_t out[4][4];
    1566           0 :   tran_low_t *outp = &out[0][0];
    1567           0 :   int outstride = 4;
    1568             : 
    1569             :   // inverse transform row vectors
    1570           0 :   for (i = 0; i < 4; ++i) {
    1571           0 :     HIGH_IHT_4[tx_type].rows(input, out[i], bd);
    1572           0 :     input += 4;
    1573             :   }
    1574             : 
    1575             :   // transpose
    1576           0 :   for (i = 0; i < 4; i++) {
    1577           0 :     for (j = 0; j < 4; j++) {
    1578           0 :       tmp[j][i] = out[i][j];
    1579             :     }
    1580             :   }
    1581             : 
    1582             :   // inverse transform column vectors
    1583           0 :   for (i = 0; i < 4; ++i) {
    1584           0 :     HIGH_IHT_4[tx_type].cols(tmp[i], out[i], bd);
    1585             :   }
    1586             : 
    1587             : #if CONFIG_EXT_TX
    1588           0 :   maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 4, 4);
    1589             : #endif
    1590             : 
    1591             :   // Sum with the destination
    1592           0 :   for (i = 0; i < 4; ++i) {
    1593           0 :     for (j = 0; j < 4; ++j) {
    1594           0 :       int d = i * stride + j;
    1595           0 :       int s = j * outstride + i;
    1596           0 :       dest[d] =
    1597           0 :           highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4), bd);
    1598             :     }
    1599             :   }
    1600           0 : }
    1601             : 
    1602           0 : void av1_highbd_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest8,
    1603             :                                 int stride, int tx_type, int bd) {
    1604             :   static const highbd_transform_2d HIGH_IHT_4x8[] = {
    1605             :     { aom_highbd_idct8_c, aom_highbd_idct4_c },    // DCT_DCT
    1606             :     { aom_highbd_iadst8_c, aom_highbd_idct4_c },   // ADST_DCT
    1607             :     { aom_highbd_idct8_c, aom_highbd_iadst4_c },   // DCT_ADST
    1608             :     { aom_highbd_iadst8_c, aom_highbd_iadst4_c },  // ADST_ADST
    1609             : #if CONFIG_EXT_TX
    1610             :     { aom_highbd_iadst8_c, aom_highbd_idct4_c },   // FLIPADST_DCT
    1611             :     { aom_highbd_idct8_c, aom_highbd_iadst4_c },   // DCT_FLIPADST
    1612             :     { aom_highbd_iadst8_c, aom_highbd_iadst4_c },  // FLIPADST_FLIPADST
    1613             :     { aom_highbd_iadst8_c, aom_highbd_iadst4_c },  // ADST_FLIPADST
    1614             :     { aom_highbd_iadst8_c, aom_highbd_iadst4_c },  // FLIPADST_ADST
    1615             :     { highbd_iidtx8_c, highbd_iidtx4_c },          // IDTX
    1616             :     { aom_highbd_idct8_c, highbd_iidtx4_c },       // V_DCT
    1617             :     { highbd_iidtx8_c, aom_highbd_idct4_c },       // H_DCT
    1618             :     { aom_highbd_iadst8_c, highbd_iidtx4_c },      // V_ADST
    1619             :     { highbd_iidtx8_c, aom_highbd_iadst4_c },      // H_ADST
    1620             :     { aom_highbd_iadst8_c, highbd_iidtx4_c },      // V_FLIPADST
    1621             :     { highbd_iidtx8_c, aom_highbd_iadst4_c },      // H_FLIPADST
    1622             : #endif                                             // CONFIG_EXT_TX
    1623             :   };
    1624           0 :   const int n = 4;
    1625           0 :   const int n2 = 8;
    1626             : 
    1627           0 :   uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
    1628             : 
    1629             :   int i, j;
    1630             :   tran_low_t out[4][8], tmp[4][8], outtmp[4];
    1631           0 :   tran_low_t *outp = &out[0][0];
    1632           0 :   int outstride = n2;
    1633             : 
    1634             :   // inverse transform row vectors, and transpose
    1635           0 :   for (i = 0; i < n2; ++i) {
    1636           0 :     HIGH_IHT_4x8[tx_type].rows(input, outtmp, bd);
    1637           0 :     for (j = 0; j < n; ++j) {
    1638           0 :       tmp[j][i] = HIGHBD_WRAPLOW(dct_const_round_shift(outtmp[j] * Sqrt2), bd);
    1639             :     }
    1640           0 :     input += n;
    1641             :   }
    1642             : 
    1643             :   // inverse transform column vectors
    1644           0 :   for (i = 0; i < n; ++i) {
    1645           0 :     HIGH_IHT_4x8[tx_type].cols(tmp[i], out[i], bd);
    1646             :   }
    1647             : 
    1648             : #if CONFIG_EXT_TX
    1649           0 :   maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n2, n);
    1650             : #endif  // CONFIG_EXT_TX
    1651             : 
    1652             :   // Sum with the destination
    1653           0 :   for (i = 0; i < n2; ++i) {
    1654           0 :     for (j = 0; j < n; ++j) {
    1655           0 :       int d = i * stride + j;
    1656           0 :       int s = j * outstride + i;
    1657           0 :       dest[d] =
    1658           0 :           highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5), bd);
    1659             :     }
    1660             :   }
    1661           0 : }
    1662             : 
    1663           0 : void av1_highbd_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest8,
    1664             :                                 int stride, int tx_type, int bd) {
    1665             :   static const highbd_transform_2d HIGH_IHT_8x4[] = {
    1666             :     { aom_highbd_idct4_c, aom_highbd_idct8_c },    // DCT_DCT
    1667             :     { aom_highbd_iadst4_c, aom_highbd_idct8_c },   // ADST_DCT
    1668             :     { aom_highbd_idct4_c, aom_highbd_iadst8_c },   // DCT_ADST
    1669             :     { aom_highbd_iadst4_c, aom_highbd_iadst8_c },  // ADST_ADST
    1670             : #if CONFIG_EXT_TX
    1671             :     { aom_highbd_iadst4_c, aom_highbd_idct8_c },   // FLIPADST_DCT
    1672             :     { aom_highbd_idct4_c, aom_highbd_iadst8_c },   // DCT_FLIPADST
    1673             :     { aom_highbd_iadst4_c, aom_highbd_iadst8_c },  // FLIPADST_FLIPADST
    1674             :     { aom_highbd_iadst4_c, aom_highbd_iadst8_c },  // ADST_FLIPADST
    1675             :     { aom_highbd_iadst4_c, aom_highbd_iadst8_c },  // FLIPADST_ADST
    1676             :     { highbd_iidtx4_c, highbd_iidtx8_c },          // IDTX
    1677             :     { aom_highbd_idct4_c, highbd_iidtx8_c },       // V_DCT
    1678             :     { highbd_iidtx4_c, aom_highbd_idct8_c },       // H_DCT
    1679             :     { aom_highbd_iadst4_c, highbd_iidtx8_c },      // V_ADST
    1680             :     { highbd_iidtx4_c, aom_highbd_iadst8_c },      // H_ADST
    1681             :     { aom_highbd_iadst4_c, highbd_iidtx8_c },      // V_FLIPADST
    1682             :     { highbd_iidtx4_c, aom_highbd_iadst8_c },      // H_FLIPADST
    1683             : #endif                                             // CONFIG_EXT_TX
    1684             :   };
    1685           0 :   const int n = 4;
    1686           0 :   const int n2 = 8;
    1687             : 
    1688           0 :   uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
    1689             : 
    1690             :   int i, j;
    1691             :   tran_low_t out[8][4], tmp[8][4], outtmp[8];
    1692           0 :   tran_low_t *outp = &out[0][0];
    1693           0 :   int outstride = n;
    1694             : 
    1695             :   // inverse transform row vectors, and transpose
    1696           0 :   for (i = 0; i < n; ++i) {
    1697           0 :     HIGH_IHT_8x4[tx_type].rows(input, outtmp, bd);
    1698           0 :     for (j = 0; j < n2; ++j) {
    1699           0 :       tmp[j][i] = HIGHBD_WRAPLOW(dct_const_round_shift(outtmp[j] * Sqrt2), bd);
    1700             :     }
    1701           0 :     input += n2;
    1702             :   }
    1703             : 
    1704             :   // inverse transform column vectors
    1705           0 :   for (i = 0; i < n2; ++i) {
    1706           0 :     HIGH_IHT_8x4[tx_type].cols(tmp[i], out[i], bd);
    1707             :   }
    1708             : 
    1709             : #if CONFIG_EXT_TX
    1710           0 :   maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n, n2);
    1711             : #endif  // CONFIG_EXT_TX
    1712             : 
    1713             :   // Sum with the destination
    1714           0 :   for (i = 0; i < n; ++i) {
    1715           0 :     for (j = 0; j < n2; ++j) {
    1716           0 :       int d = i * stride + j;
    1717           0 :       int s = j * outstride + i;
    1718           0 :       dest[d] =
    1719           0 :           highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5), bd);
    1720             :     }
    1721             :   }
    1722           0 : }
    1723             : 
    1724           0 : void av1_highbd_iht4x16_64_add_c(const tran_low_t *input, uint8_t *dest8,
    1725             :                                  int stride, int tx_type, int bd) {
    1726             :   static const highbd_transform_2d HIGH_IHT_4x16[] = {
    1727             :     { aom_highbd_idct16_c, aom_highbd_idct4_c },    // DCT_DCT
    1728             :     { aom_highbd_iadst16_c, aom_highbd_idct4_c },   // ADST_DCT
    1729             :     { aom_highbd_idct16_c, aom_highbd_iadst4_c },   // DCT_ADST
    1730             :     { aom_highbd_iadst16_c, aom_highbd_iadst4_c },  // ADST_ADST
    1731             : #if CONFIG_EXT_TX
    1732             :     { aom_highbd_iadst16_c, aom_highbd_idct4_c },   // FLIPADST_DCT
    1733             :     { aom_highbd_idct16_c, aom_highbd_iadst4_c },   // DCT_FLIPADST
    1734             :     { aom_highbd_iadst16_c, aom_highbd_iadst4_c },  // FLIPADST_FLIPADST
    1735             :     { aom_highbd_iadst16_c, aom_highbd_iadst4_c },  // ADST_FLIPADST
    1736             :     { aom_highbd_iadst16_c, aom_highbd_iadst4_c },  // FLIPADST_ADST
    1737             :     { highbd_iidtx16_c, highbd_iidtx4_c },          // IDTX
    1738             :     { aom_highbd_idct16_c, highbd_iidtx4_c },       // V_DCT
    1739             :     { highbd_iidtx16_c, aom_highbd_idct4_c },       // H_DCT
    1740             :     { aom_highbd_iadst16_c, highbd_iidtx4_c },      // V_ADST
    1741             :     { highbd_iidtx16_c, aom_highbd_iadst4_c },      // H_ADST
    1742             :     { aom_highbd_iadst16_c, highbd_iidtx4_c },      // V_FLIPADST
    1743             :     { highbd_iidtx16_c, aom_highbd_iadst4_c },      // H_FLIPADST
    1744             : #endif                                              // CONFIG_EXT_TX
    1745             :   };
    1746           0 :   const int n = 4;
    1747           0 :   const int n4 = 16;
    1748             : 
    1749           0 :   uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
    1750             : 
    1751             :   int i, j;
    1752             :   tran_low_t out[4][16], tmp[4][16], outtmp[4];
    1753           0 :   tran_low_t *outp = &out[0][0];
    1754           0 :   int outstride = n4;
    1755             : 
    1756             :   // inverse transform row vectors, and transpose
    1757           0 :   for (i = 0; i < n4; ++i) {
    1758           0 :     HIGH_IHT_4x16[tx_type].rows(input, outtmp, bd);
    1759           0 :     for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j];
    1760           0 :     input += n;
    1761             :   }
    1762             : 
    1763             :   // inverse transform column vectors
    1764           0 :   for (i = 0; i < n; ++i) HIGH_IHT_4x16[tx_type].cols(tmp[i], out[i], bd);
    1765             : 
    1766             : #if CONFIG_EXT_TX
    1767           0 :   maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n4, n);
    1768             : #endif  // CONFIG_EXT_TX
    1769             : 
    1770             :   // Sum with the destination
    1771           0 :   for (i = 0; i < n4; ++i) {
    1772           0 :     for (j = 0; j < n; ++j) {
    1773           0 :       int d = i * stride + j;
    1774           0 :       int s = j * outstride + i;
    1775           0 :       dest[d] =
    1776           0 :           highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5), bd);
    1777             :     }
    1778             :   }
    1779           0 : }
    1780             : 
    1781           0 : void av1_highbd_iht16x4_64_add_c(const tran_low_t *input, uint8_t *dest8,
    1782             :                                  int stride, int tx_type, int bd) {
    1783             :   static const highbd_transform_2d HIGH_IHT_16x4[] = {
    1784             :     { aom_highbd_idct4_c, aom_highbd_idct16_c },    // DCT_DCT
    1785             :     { aom_highbd_iadst4_c, aom_highbd_idct16_c },   // ADST_DCT
    1786             :     { aom_highbd_idct4_c, aom_highbd_iadst16_c },   // DCT_ADST
    1787             :     { aom_highbd_iadst4_c, aom_highbd_iadst16_c },  // ADST_ADST
    1788             : #if CONFIG_EXT_TX
    1789             :     { aom_highbd_iadst4_c, aom_highbd_idct16_c },   // FLIPADST_DCT
    1790             :     { aom_highbd_idct4_c, aom_highbd_iadst16_c },   // DCT_FLIPADST
    1791             :     { aom_highbd_iadst4_c, aom_highbd_iadst16_c },  // FLIPADST_FLIPADST
    1792             :     { aom_highbd_iadst4_c, aom_highbd_iadst16_c },  // ADST_FLIPADST
    1793             :     { aom_highbd_iadst4_c, aom_highbd_iadst16_c },  // FLIPADST_ADST
    1794             :     { highbd_iidtx4_c, highbd_iidtx16_c },          // IDTX
    1795             :     { aom_highbd_idct4_c, highbd_iidtx16_c },       // V_DCT
    1796             :     { highbd_iidtx4_c, aom_highbd_idct16_c },       // H_DCT
    1797             :     { aom_highbd_iadst4_c, highbd_iidtx16_c },      // V_ADST
    1798             :     { highbd_iidtx4_c, aom_highbd_iadst16_c },      // H_ADST
    1799             :     { aom_highbd_iadst4_c, highbd_iidtx16_c },      // V_FLIPADST
    1800             :     { highbd_iidtx4_c, aom_highbd_iadst16_c },      // H_FLIPADST
    1801             : #endif                                              // CONFIG_EXT_TX
    1802             :   };
    1803           0 :   const int n = 4;
    1804           0 :   const int n4 = 16;
    1805             : 
    1806           0 :   uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
    1807             : 
    1808             :   int i, j;
    1809             :   tran_low_t out[16][4], tmp[16][4], outtmp[16];
    1810           0 :   tran_low_t *outp = &out[0][0];
    1811           0 :   int outstride = n;
    1812             : 
    1813             :   // inverse transform row vectors, and transpose
    1814           0 :   for (i = 0; i < n; ++i) {
    1815           0 :     HIGH_IHT_16x4[tx_type].rows(input, outtmp, bd);
    1816           0 :     for (j = 0; j < n4; ++j) tmp[j][i] = outtmp[j];
    1817           0 :     input += n4;
    1818             :   }
    1819             : 
    1820             :   // inverse transform column vectors
    1821           0 :   for (i = 0; i < n4; ++i) {
    1822           0 :     HIGH_IHT_16x4[tx_type].cols(tmp[i], out[i], bd);
    1823             :   }
    1824             : 
    1825             : #if CONFIG_EXT_TX
    1826           0 :   maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n, n4);
    1827             : #endif  // CONFIG_EXT_TX
    1828             : 
    1829             :   // Sum with the destination
    1830           0 :   for (i = 0; i < n; ++i) {
    1831           0 :     for (j = 0; j < n4; ++j) {
    1832           0 :       int d = i * stride + j;
    1833           0 :       int s = j * outstride + i;
    1834           0 :       dest[d] =
    1835           0 :           highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5), bd);
    1836             :     }
    1837             :   }
    1838           0 : }
    1839             : 
    1840           0 : void av1_highbd_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest8,
    1841             :                                   int stride, int tx_type, int bd) {
    1842             :   static const highbd_transform_2d HIGH_IHT_8x16[] = {
    1843             :     { aom_highbd_idct16_c, aom_highbd_idct8_c },    // DCT_DCT
    1844             :     { aom_highbd_iadst16_c, aom_highbd_idct8_c },   // ADST_DCT
    1845             :     { aom_highbd_idct16_c, aom_highbd_iadst8_c },   // DCT_ADST
    1846             :     { aom_highbd_iadst16_c, aom_highbd_iadst8_c },  // ADST_ADST
    1847             : #if CONFIG_EXT_TX
    1848             :     { aom_highbd_iadst16_c, aom_highbd_idct8_c },   // FLIPADST_DCT
    1849             :     { aom_highbd_idct16_c, aom_highbd_iadst8_c },   // DCT_FLIPADST
    1850             :     { aom_highbd_iadst16_c, aom_highbd_iadst8_c },  // FLIPADST_FLIPADST
    1851             :     { aom_highbd_iadst16_c, aom_highbd_iadst8_c },  // ADST_FLIPADST
    1852             :     { aom_highbd_iadst16_c, aom_highbd_iadst8_c },  // FLIPADST_ADST
    1853             :     { highbd_iidtx16_c, highbd_iidtx8_c },          // IDTX
    1854             :     { aom_highbd_idct16_c, highbd_iidtx8_c },       // V_DCT
    1855             :     { highbd_iidtx16_c, aom_highbd_idct8_c },       // H_DCT
    1856             :     { aom_highbd_iadst16_c, highbd_iidtx8_c },      // V_ADST
    1857             :     { highbd_iidtx16_c, aom_highbd_iadst8_c },      // H_ADST
    1858             :     { aom_highbd_iadst16_c, highbd_iidtx8_c },      // V_FLIPADST
    1859             :     { highbd_iidtx16_c, aom_highbd_iadst8_c },      // H_FLIPADST
    1860             : #endif                                              // CONFIG_EXT_TX
    1861             :   };
    1862           0 :   const int n = 8;
    1863           0 :   const int n2 = 16;
    1864             : 
    1865           0 :   uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
    1866             : 
    1867             :   int i, j;
    1868             :   tran_low_t out[8][16], tmp[8][16], outtmp[8];
    1869           0 :   tran_low_t *outp = &out[0][0];
    1870           0 :   int outstride = n2;
    1871             : 
    1872             :   // inverse transform row vectors, and transpose
    1873           0 :   for (i = 0; i < n2; ++i) {
    1874           0 :     HIGH_IHT_8x16[tx_type].rows(input, outtmp, bd);
    1875           0 :     for (j = 0; j < n; ++j)
    1876           0 :       tmp[j][i] = HIGHBD_WRAPLOW(dct_const_round_shift(outtmp[j] * Sqrt2), bd);
    1877           0 :     input += n;
    1878             :   }
    1879             : 
    1880             :   // inverse transform column vectors
    1881           0 :   for (i = 0; i < n; ++i) {
    1882           0 :     HIGH_IHT_8x16[tx_type].cols(tmp[i], out[i], bd);
    1883             :   }
    1884             : 
    1885             : #if CONFIG_EXT_TX
    1886           0 :   maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n2, n);
    1887             : #endif  // CONFIG_EXT_TX
    1888             : 
    1889             :   // Sum with the destination
    1890           0 :   for (i = 0; i < n2; ++i) {
    1891           0 :     for (j = 0; j < n; ++j) {
    1892           0 :       int d = i * stride + j;
    1893           0 :       int s = j * outstride + i;
    1894           0 :       dest[d] =
    1895           0 :           highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
    1896             :     }
    1897             :   }
    1898           0 : }
    1899             : 
    1900           0 : void av1_highbd_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest8,
    1901             :                                   int stride, int tx_type, int bd) {
    1902             :   static const highbd_transform_2d HIGH_IHT_16x8[] = {
    1903             :     { aom_highbd_idct8_c, aom_highbd_idct16_c },    // DCT_DCT
    1904             :     { aom_highbd_iadst8_c, aom_highbd_idct16_c },   // ADST_DCT
    1905             :     { aom_highbd_idct8_c, aom_highbd_iadst16_c },   // DCT_ADST
    1906             :     { aom_highbd_iadst8_c, aom_highbd_iadst16_c },  // ADST_ADST
    1907             : #if CONFIG_EXT_TX
    1908             :     { aom_highbd_iadst8_c, aom_highbd_idct16_c },   // FLIPADST_DCT
    1909             :     { aom_highbd_idct8_c, aom_highbd_iadst16_c },   // DCT_FLIPADST
    1910             :     { aom_highbd_iadst8_c, aom_highbd_iadst16_c },  // FLIPADST_FLIPADST
    1911             :     { aom_highbd_iadst8_c, aom_highbd_iadst16_c },  // ADST_FLIPADST
    1912             :     { aom_highbd_iadst8_c, aom_highbd_iadst16_c },  // FLIPADST_ADST
    1913             :     { highbd_iidtx8_c, highbd_iidtx16_c },          // IDTX
    1914             :     { aom_highbd_idct8_c, highbd_iidtx16_c },       // V_DCT
    1915             :     { highbd_iidtx8_c, aom_highbd_idct16_c },       // H_DCT
    1916             :     { aom_highbd_iadst8_c, highbd_iidtx16_c },      // V_ADST
    1917             :     { highbd_iidtx8_c, aom_highbd_iadst16_c },      // H_ADST
    1918             :     { aom_highbd_iadst8_c, highbd_iidtx16_c },      // V_FLIPADST
    1919             :     { highbd_iidtx8_c, aom_highbd_iadst16_c },      // H_FLIPADST
    1920             : #endif                                              // CONFIG_EXT_TX
    1921             :   };
    1922           0 :   const int n = 8;
    1923           0 :   const int n2 = 16;
    1924             : 
    1925           0 :   uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
    1926             : 
    1927             :   int i, j;
    1928             :   tran_low_t out[16][8], tmp[16][8], outtmp[16];
    1929           0 :   tran_low_t *outp = &out[0][0];
    1930           0 :   int outstride = n;
    1931             : 
    1932             :   // inverse transform row vectors, and transpose
    1933           0 :   for (i = 0; i < n; ++i) {
    1934           0 :     HIGH_IHT_16x8[tx_type].rows(input, outtmp, bd);
    1935           0 :     for (j = 0; j < n2; ++j)
    1936           0 :       tmp[j][i] = HIGHBD_WRAPLOW(dct_const_round_shift(outtmp[j] * Sqrt2), bd);
    1937           0 :     input += n2;
    1938             :   }
    1939             : 
    1940             :   // inverse transform column vectors
    1941           0 :   for (i = 0; i < n2; ++i) {
    1942           0 :     HIGH_IHT_16x8[tx_type].cols(tmp[i], out[i], bd);
    1943             :   }
    1944             : 
    1945             : #if CONFIG_EXT_TX
    1946           0 :   maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n, n2);
    1947             : #endif  // CONFIG_EXT_TX
    1948             : 
    1949             :   // Sum with the destination
    1950           0 :   for (i = 0; i < n; ++i) {
    1951           0 :     for (j = 0; j < n2; ++j) {
    1952           0 :       int d = i * stride + j;
    1953           0 :       int s = j * outstride + i;
    1954           0 :       dest[d] =
    1955           0 :           highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
    1956             :     }
    1957             :   }
    1958           0 : }
    1959             : 
    1960           0 : void av1_highbd_iht8x32_256_add_c(const tran_low_t *input, uint8_t *dest8,
    1961             :                                   int stride, int tx_type, int bd) {
    1962             :   static const highbd_transform_2d HIGH_IHT_8x32[] = {
    1963             :     { aom_highbd_idct32_c, aom_highbd_idct8_c },     // DCT_DCT
    1964             :     { highbd_ihalfright32_c, aom_highbd_idct8_c },   // ADST_DCT
    1965             :     { aom_highbd_idct32_c, aom_highbd_iadst8_c },    // DCT_ADST
    1966             :     { highbd_ihalfright32_c, aom_highbd_iadst8_c },  // ADST_ADST
    1967             : #if CONFIG_EXT_TX
    1968             :     { highbd_ihalfright32_c, aom_highbd_idct8_c },   // FLIPADST_DCT
    1969             :     { aom_highbd_idct32_c, aom_highbd_iadst8_c },    // DCT_FLIPADST
    1970             :     { highbd_ihalfright32_c, aom_highbd_iadst8_c },  // FLIPADST_FLIPADST
    1971             :     { highbd_ihalfright32_c, aom_highbd_iadst8_c },  // ADST_FLIPADST
    1972             :     { highbd_ihalfright32_c, aom_highbd_iadst8_c },  // FLIPADST_ADST
    1973             :     { highbd_iidtx32_c, highbd_iidtx8_c },           // IDTX
    1974             :     { aom_highbd_idct32_c, highbd_iidtx8_c },        // V_DCT
    1975             :     { highbd_iidtx32_c, aom_highbd_idct8_c },        // H_DCT
    1976             :     { highbd_ihalfright32_c, highbd_iidtx8_c },      // V_ADST
    1977             :     { highbd_iidtx32_c, aom_highbd_iadst8_c },       // H_ADST
    1978             :     { highbd_ihalfright32_c, highbd_iidtx8_c },      // V_FLIPADST
    1979             :     { highbd_iidtx32_c, aom_highbd_iadst8_c },       // H_FLIPADST
    1980             : #endif                                               // CONFIG_EXT_TX
    1981             :   };
    1982           0 :   const int n = 8;
    1983           0 :   const int n4 = 32;
    1984             : 
    1985           0 :   uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
    1986             : 
    1987             :   int i, j;
    1988             :   tran_low_t out[8][32], tmp[8][32], outtmp[8];
    1989           0 :   tran_low_t *outp = &out[0][0];
    1990           0 :   int outstride = n4;
    1991             : 
    1992             :   // inverse transform row vectors, and transpose
    1993           0 :   for (i = 0; i < n4; ++i) {
    1994           0 :     HIGH_IHT_8x32[tx_type].rows(input, outtmp, bd);
    1995           0 :     for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j];
    1996           0 :     input += n;
    1997             :   }
    1998             : 
    1999             :   // inverse transform column vectors
    2000           0 :   for (i = 0; i < n; ++i) HIGH_IHT_8x32[tx_type].cols(tmp[i], out[i], bd);
    2001             : 
    2002             : #if CONFIG_EXT_TX
    2003           0 :   maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n4, n);
    2004             : #endif  // CONFIG_EXT_TX
    2005             : 
    2006             :   // Sum with the destination
    2007           0 :   for (i = 0; i < n4; ++i) {
    2008           0 :     for (j = 0; j < n; ++j) {
    2009           0 :       int d = i * stride + j;
    2010           0 :       int s = j * outstride + i;
    2011           0 :       dest[d] =
    2012           0 :           highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
    2013             :     }
    2014             :   }
    2015           0 : }
    2016             : 
    2017           0 : void av1_highbd_iht32x8_256_add_c(const tran_low_t *input, uint8_t *dest8,
    2018             :                                   int stride, int tx_type, int bd) {
    2019             :   static const highbd_transform_2d HIGH_IHT_32x8[] = {
    2020             :     { aom_highbd_idct8_c, aom_highbd_idct32_c },     // DCT_DCT
    2021             :     { aom_highbd_iadst8_c, aom_highbd_idct32_c },    // ADST_DCT
    2022             :     { aom_highbd_idct8_c, highbd_ihalfright32_c },   // DCT_ADST
    2023             :     { aom_highbd_iadst8_c, highbd_ihalfright32_c },  // ADST_ADST
    2024             : #if CONFIG_EXT_TX
    2025             :     { aom_highbd_iadst8_c, aom_highbd_idct32_c },    // FLIPADST_DCT
    2026             :     { aom_highbd_idct8_c, highbd_ihalfright32_c },   // DCT_FLIPADST
    2027             :     { aom_highbd_iadst8_c, highbd_ihalfright32_c },  // FLIPADST_FLIPADST
    2028             :     { aom_highbd_iadst8_c, highbd_ihalfright32_c },  // ADST_FLIPADST
    2029             :     { aom_highbd_iadst8_c, highbd_ihalfright32_c },  // FLIPADST_ADST
    2030             :     { highbd_iidtx8_c, highbd_iidtx32_c },           // IDTX
    2031             :     { aom_highbd_idct8_c, highbd_iidtx32_c },        // V_DCT
    2032             :     { highbd_iidtx8_c, aom_highbd_idct32_c },        // H_DCT
    2033             :     { aom_highbd_iadst8_c, highbd_iidtx32_c },       // V_ADST
    2034             :     { highbd_iidtx8_c, highbd_ihalfright32_c },      // H_ADST
    2035             :     { aom_highbd_iadst8_c, highbd_iidtx32_c },       // V_FLIPADST
    2036             :     { highbd_iidtx8_c, highbd_ihalfright32_c },      // H_FLIPADST
    2037             : #endif                                               // CONFIG_EXT_TX
    2038             :   };
    2039           0 :   const int n = 8;
    2040           0 :   const int n4 = 32;
    2041             : 
    2042           0 :   uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
    2043             : 
    2044             :   int i, j;
    2045             :   tran_low_t out[32][8], tmp[32][8], outtmp[32];
    2046           0 :   tran_low_t *outp = &out[0][0];
    2047           0 :   int outstride = n;
    2048             : 
    2049             :   // inverse transform row vectors, and transpose
    2050           0 :   for (i = 0; i < n; ++i) {
    2051           0 :     HIGH_IHT_32x8[tx_type].rows(input, outtmp, bd);
    2052           0 :     for (j = 0; j < n4; ++j) tmp[j][i] = outtmp[j];
    2053           0 :     input += n4;
    2054             :   }
    2055             : 
    2056             :   // inverse transform column vectors
    2057           0 :   for (i = 0; i < n4; ++i) HIGH_IHT_32x8[tx_type].cols(tmp[i], out[i], bd);
    2058             : 
    2059             : #if CONFIG_EXT_TX
    2060           0 :   maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n, n4);
    2061             : #endif  // CONFIG_EXT_TX
    2062             : 
    2063             :   // Sum with the destination
    2064           0 :   for (i = 0; i < n; ++i) {
    2065           0 :     for (j = 0; j < n4; ++j) {
    2066           0 :       int d = i * stride + j;
    2067           0 :       int s = j * outstride + i;
    2068           0 :       dest[d] =
    2069           0 :           highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
    2070             :     }
    2071             :   }
    2072           0 : }
    2073             : 
    2074           0 : void av1_highbd_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest8,
    2075             :                                    int stride, int tx_type, int bd) {
    2076             :   static const highbd_transform_2d HIGH_IHT_16x32[] = {
    2077             :     { aom_highbd_idct32_c, aom_highbd_idct16_c },     // DCT_DCT
    2078             :     { highbd_ihalfright32_c, aom_highbd_idct16_c },   // ADST_DCT
    2079             :     { aom_highbd_idct32_c, aom_highbd_iadst16_c },    // DCT_ADST
    2080             :     { highbd_ihalfright32_c, aom_highbd_iadst16_c },  // ADST_ADST
    2081             : #if CONFIG_EXT_TX
    2082             :     { highbd_ihalfright32_c, aom_highbd_idct16_c },   // FLIPADST_DCT
    2083             :     { aom_highbd_idct32_c, aom_highbd_iadst16_c },    // DCT_FLIPADST
    2084             :     { highbd_ihalfright32_c, aom_highbd_iadst16_c },  // FLIPADST_FLIPADST
    2085             :     { highbd_ihalfright32_c, aom_highbd_iadst16_c },  // ADST_FLIPADST
    2086             :     { highbd_ihalfright32_c, aom_highbd_iadst16_c },  // FLIPADST_ADST
    2087             :     { highbd_iidtx32_c, highbd_iidtx16_c },           // IDTX
    2088             :     { aom_highbd_idct32_c, highbd_iidtx16_c },        // V_DCT
    2089             :     { highbd_iidtx32_c, aom_highbd_idct16_c },        // H_DCT
    2090             :     { highbd_ihalfright32_c, highbd_iidtx16_c },      // V_ADST
    2091             :     { highbd_iidtx32_c, aom_highbd_iadst16_c },       // H_ADST
    2092             :     { highbd_ihalfright32_c, highbd_iidtx16_c },      // V_FLIPADST
    2093             :     { highbd_iidtx32_c, aom_highbd_iadst16_c },       // H_FLIPADST
    2094             : #endif                                                // CONFIG_EXT_TX
    2095             :   };
    2096           0 :   const int n = 16;
    2097           0 :   const int n2 = 32;
    2098             : 
    2099           0 :   uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
    2100             : 
    2101             :   int i, j;
    2102             :   tran_low_t out[16][32], tmp[16][32], outtmp[16];
    2103           0 :   tran_low_t *outp = &out[0][0];
    2104           0 :   int outstride = n2;
    2105             : 
    2106             :   // inverse transform row vectors, and transpose
    2107           0 :   for (i = 0; i < n2; ++i) {
    2108           0 :     HIGH_IHT_16x32[tx_type].rows(input, outtmp, bd);
    2109           0 :     for (j = 0; j < n; ++j)
    2110           0 :       tmp[j][i] = HIGHBD_WRAPLOW(dct_const_round_shift(outtmp[j] * Sqrt2), bd);
    2111           0 :     input += n;
    2112             :   }
    2113             : 
    2114             :   // inverse transform column vectors
    2115           0 :   for (i = 0; i < n; ++i) {
    2116           0 :     HIGH_IHT_16x32[tx_type].cols(tmp[i], out[i], bd);
    2117             :   }
    2118             : 
    2119             : #if CONFIG_EXT_TX
    2120           0 :   maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n2, n);
    2121             : #endif  // CONFIG_EXT_TX
    2122             : 
    2123             :   // Sum with the destination
    2124           0 :   for (i = 0; i < n2; ++i) {
    2125           0 :     for (j = 0; j < n; ++j) {
    2126           0 :       int d = i * stride + j;
    2127           0 :       int s = j * outstride + i;
    2128           0 :       dest[d] =
    2129           0 :           highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
    2130             :     }
    2131             :   }
    2132           0 : }
    2133             : 
    2134           0 : void av1_highbd_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest8,
    2135             :                                    int stride, int tx_type, int bd) {
    2136             :   static const highbd_transform_2d HIGH_IHT_32x16[] = {
    2137             :     { aom_highbd_idct16_c, aom_highbd_idct32_c },     // DCT_DCT
    2138             :     { aom_highbd_iadst16_c, aom_highbd_idct32_c },    // ADST_DCT
    2139             :     { aom_highbd_idct16_c, highbd_ihalfright32_c },   // DCT_ADST
    2140             :     { aom_highbd_iadst16_c, highbd_ihalfright32_c },  // ADST_ADST
    2141             : #if CONFIG_EXT_TX
    2142             :     { aom_highbd_iadst16_c, aom_highbd_idct32_c },    // FLIPADST_DCT
    2143             :     { aom_highbd_idct16_c, highbd_ihalfright32_c },   // DCT_FLIPADST
    2144             :     { aom_highbd_iadst16_c, highbd_ihalfright32_c },  // FLIPADST_FLIPADST
    2145             :     { aom_highbd_iadst16_c, highbd_ihalfright32_c },  // ADST_FLIPADST
    2146             :     { aom_highbd_iadst16_c, highbd_ihalfright32_c },  // FLIPADST_ADST
    2147             :     { highbd_iidtx16_c, highbd_iidtx32_c },           // IDTX
    2148             :     { aom_highbd_idct16_c, highbd_iidtx32_c },        // V_DCT
    2149             :     { highbd_iidtx16_c, aom_highbd_idct32_c },        // H_DCT
    2150             :     { aom_highbd_iadst16_c, highbd_iidtx32_c },       // V_ADST
    2151             :     { highbd_iidtx16_c, highbd_ihalfright32_c },      // H_ADST
    2152             :     { aom_highbd_iadst16_c, highbd_iidtx32_c },       // V_FLIPADST
    2153             :     { highbd_iidtx16_c, highbd_ihalfright32_c },      // H_FLIPADST
    2154             : #endif                                                // CONFIG_EXT_TX
    2155             :   };
    2156           0 :   const int n = 16;
    2157           0 :   const int n2 = 32;
    2158             : 
    2159           0 :   uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
    2160             : 
    2161             :   int i, j;
    2162             :   tran_low_t out[32][16], tmp[32][16], outtmp[32];
    2163           0 :   tran_low_t *outp = &out[0][0];
    2164           0 :   int outstride = n;
    2165             : 
    2166             :   // inverse transform row vectors, and transpose
    2167           0 :   for (i = 0; i < n; ++i) {
    2168           0 :     HIGH_IHT_32x16[tx_type].rows(input, outtmp, bd);
    2169           0 :     for (j = 0; j < n2; ++j)
    2170           0 :       tmp[j][i] = HIGHBD_WRAPLOW(dct_const_round_shift(outtmp[j] * Sqrt2), bd);
    2171           0 :     input += n2;
    2172             :   }
    2173             : 
    2174             :   // inverse transform column vectors
    2175           0 :   for (i = 0; i < n2; ++i) {
    2176           0 :     HIGH_IHT_32x16[tx_type].cols(tmp[i], out[i], bd);
    2177             :   }
    2178             : 
    2179             : #if CONFIG_EXT_TX
    2180           0 :   maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n, n2);
    2181             : #endif  // CONFIG_EXT_TX
    2182             : 
    2183             :   // Sum with the destination
    2184           0 :   for (i = 0; i < n; ++i) {
    2185           0 :     for (j = 0; j < n2; ++j) {
    2186           0 :       int d = i * stride + j;
    2187           0 :       int s = j * outstride + i;
    2188           0 :       dest[d] =
    2189           0 :           highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
    2190             :     }
    2191             :   }
    2192           0 : }
    2193             : 
    2194           0 : void av1_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8,
    2195             :                                 int stride, int tx_type, int bd) {
    2196             :   static const highbd_transform_2d HIGH_IHT_8[] = {
    2197             :     { aom_highbd_idct8_c, aom_highbd_idct8_c },    // DCT_DCT
    2198             :     { aom_highbd_iadst8_c, aom_highbd_idct8_c },   // ADST_DCT
    2199             :     { aom_highbd_idct8_c, aom_highbd_iadst8_c },   // DCT_ADST
    2200             :     { aom_highbd_iadst8_c, aom_highbd_iadst8_c },  // ADST_ADST
    2201             : #if CONFIG_EXT_TX
    2202             :     { aom_highbd_iadst8_c, aom_highbd_idct8_c },   // FLIPADST_DCT
    2203             :     { aom_highbd_idct8_c, aom_highbd_iadst8_c },   // DCT_FLIPADST
    2204             :     { aom_highbd_iadst8_c, aom_highbd_iadst8_c },  // FLIPADST_FLIPADST
    2205             :     { aom_highbd_iadst8_c, aom_highbd_iadst8_c },  // ADST_FLIPADST
    2206             :     { aom_highbd_iadst8_c, aom_highbd_iadst8_c },  // FLIPADST_ADST
    2207             :     { highbd_iidtx8_c, highbd_iidtx8_c },          // IDTX
    2208             :     { aom_highbd_idct8_c, highbd_iidtx8_c },       // V_DCT
    2209             :     { highbd_iidtx8_c, aom_highbd_idct8_c },       // H_DCT
    2210             :     { aom_highbd_iadst8_c, highbd_iidtx8_c },      // V_ADST
    2211             :     { highbd_iidtx8_c, aom_highbd_iadst8_c },      // H_ADST
    2212             :     { aom_highbd_iadst8_c, highbd_iidtx8_c },      // V_FLIPADST
    2213             :     { highbd_iidtx8_c, aom_highbd_iadst8_c },      // H_FLIPADST
    2214             : #endif                                             // CONFIG_EXT_TX
    2215             :   };
    2216             : 
    2217           0 :   uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
    2218             : 
    2219             :   int i, j;
    2220             :   tran_low_t tmp[8][8];
    2221             :   tran_low_t out[8][8];
    2222           0 :   tran_low_t *outp = &out[0][0];
    2223           0 :   int outstride = 8;
    2224             : 
    2225             :   // inverse transform row vectors
    2226           0 :   for (i = 0; i < 8; ++i) {
    2227           0 :     HIGH_IHT_8[tx_type].rows(input, out[i], bd);
    2228           0 :     input += 8;
    2229             :   }
    2230             : 
    2231             :   // transpose
    2232           0 :   for (i = 0; i < 8; i++) {
    2233           0 :     for (j = 0; j < 8; j++) {
    2234           0 :       tmp[j][i] = out[i][j];
    2235             :     }
    2236             :   }
    2237             : 
    2238             :   // inverse transform column vectors
    2239           0 :   for (i = 0; i < 8; ++i) {
    2240           0 :     HIGH_IHT_8[tx_type].cols(tmp[i], out[i], bd);
    2241             :   }
    2242             : 
    2243             : #if CONFIG_EXT_TX
    2244           0 :   maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 8, 8);
    2245             : #endif
    2246             : 
    2247             :   // Sum with the destination
    2248           0 :   for (i = 0; i < 8; ++i) {
    2249           0 :     for (j = 0; j < 8; ++j) {
    2250           0 :       int d = i * stride + j;
    2251           0 :       int s = j * outstride + i;
    2252           0 :       dest[d] =
    2253           0 :           highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5), bd);
    2254             :     }
    2255             :   }
    2256           0 : }
    2257             : 
    2258           0 : void av1_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8,
    2259             :                                    int stride, int tx_type, int bd) {
    2260             :   static const highbd_transform_2d HIGH_IHT_16[] = {
    2261             :     { aom_highbd_idct16_c, aom_highbd_idct16_c },    // DCT_DCT
    2262             :     { aom_highbd_iadst16_c, aom_highbd_idct16_c },   // ADST_DCT
    2263             :     { aom_highbd_idct16_c, aom_highbd_iadst16_c },   // DCT_ADST
    2264             :     { aom_highbd_iadst16_c, aom_highbd_iadst16_c },  // ADST_ADST
    2265             : #if CONFIG_EXT_TX
    2266             :     { aom_highbd_iadst16_c, aom_highbd_idct16_c },   // FLIPADST_DCT
    2267             :     { aom_highbd_idct16_c, aom_highbd_iadst16_c },   // DCT_FLIPADST
    2268             :     { aom_highbd_iadst16_c, aom_highbd_iadst16_c },  // FLIPADST_FLIPADST
    2269             :     { aom_highbd_iadst16_c, aom_highbd_iadst16_c },  // ADST_FLIPADST
    2270             :     { aom_highbd_iadst16_c, aom_highbd_iadst16_c },  // FLIPADST_ADST
    2271             :     { highbd_iidtx16_c, highbd_iidtx16_c },          // IDTX
    2272             :     { aom_highbd_idct16_c, highbd_iidtx16_c },       // V_DCT
    2273             :     { highbd_iidtx16_c, aom_highbd_idct16_c },       // H_DCT
    2274             :     { aom_highbd_iadst16_c, highbd_iidtx16_c },      // V_ADST
    2275             :     { highbd_iidtx16_c, aom_highbd_iadst16_c },      // H_ADST
    2276             :     { aom_highbd_iadst16_c, highbd_iidtx16_c },      // V_FLIPADST
    2277             :     { highbd_iidtx16_c, aom_highbd_iadst16_c },      // H_FLIPADST
    2278             : #endif                                               // CONFIG_EXT_TX
    2279             :   };
    2280             : 
    2281           0 :   uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
    2282             : 
    2283             :   int i, j;
    2284             :   tran_low_t tmp[16][16];
    2285             :   tran_low_t out[16][16];
    2286           0 :   tran_low_t *outp = &out[0][0];
    2287           0 :   int outstride = 16;
    2288             : 
    2289             :   // inverse transform row vectors
    2290           0 :   for (i = 0; i < 16; ++i) {
    2291           0 :     HIGH_IHT_16[tx_type].rows(input, out[i], bd);
    2292           0 :     input += 16;
    2293             :   }
    2294             : 
    2295             :   // transpose
    2296           0 :   for (i = 0; i < 16; i++) {
    2297           0 :     for (j = 0; j < 16; j++) {
    2298           0 :       tmp[j][i] = out[i][j];
    2299             :     }
    2300             :   }
    2301             : 
    2302             :   // inverse transform column vectors
    2303           0 :   for (i = 0; i < 16; ++i) {
    2304           0 :     HIGH_IHT_16[tx_type].cols(tmp[i], out[i], bd);
    2305             :   }
    2306             : 
    2307             : #if CONFIG_EXT_TX
    2308           0 :   maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 16, 16);
    2309             : #endif
    2310             : 
    2311             :   // Sum with the destination
    2312           0 :   for (i = 0; i < 16; ++i) {
    2313           0 :     for (j = 0; j < 16; ++j) {
    2314           0 :       int d = i * stride + j;
    2315           0 :       int s = j * outstride + i;
    2316           0 :       dest[d] =
    2317           0 :           highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
    2318             :     }
    2319             :   }
    2320           0 : }
    2321             : 
    2322             : #if CONFIG_EXT_TX
    2323           0 : static void highbd_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8,
    2324             :                                        int stride, int tx_type, int bd) {
    2325             :   static const highbd_transform_2d HIGH_IHT_32[] = {
    2326             :     { aom_highbd_idct32_c, aom_highbd_idct32_c },      // DCT_DCT
    2327             :     { highbd_ihalfright32_c, aom_highbd_idct32_c },    // ADST_DCT
    2328             :     { aom_highbd_idct32_c, highbd_ihalfright32_c },    // DCT_ADST
    2329             :     { highbd_ihalfright32_c, highbd_ihalfright32_c },  // ADST_ADST
    2330             :     { highbd_ihalfright32_c, aom_highbd_idct32_c },    // FLIPADST_DCT
    2331             :     { aom_highbd_idct32_c, highbd_ihalfright32_c },    // DCT_FLIPADST
    2332             :     { highbd_ihalfright32_c, highbd_ihalfright32_c },  // FLIPADST_FLIPADST
    2333             :     { highbd_ihalfright32_c, highbd_ihalfright32_c },  // ADST_FLIPADST
    2334             :     { highbd_ihalfright32_c, highbd_ihalfright32_c },  // FLIPADST_ADST
    2335             :     { highbd_iidtx32_c, highbd_iidtx32_c },            // IDTX
    2336             :     { aom_highbd_idct32_c, highbd_iidtx32_c },         // V_DCT
    2337             :     { highbd_iidtx32_c, aom_highbd_idct32_c },         // H_DCT
    2338             :     { highbd_ihalfright32_c, highbd_iidtx32_c },       // V_ADST
    2339             :     { highbd_iidtx32_c, highbd_ihalfright32_c },       // H_ADST
    2340             :     { highbd_ihalfright32_c, highbd_iidtx32_c },       // V_FLIPADST
    2341             :     { highbd_iidtx32_c, highbd_ihalfright32_c },       // H_FLIPADST
    2342             :   };
    2343             : 
    2344           0 :   uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
    2345             : 
    2346             :   int i, j;
    2347             :   tran_low_t tmp[32][32];
    2348             :   tran_low_t out[32][32];
    2349           0 :   tran_low_t *outp = &out[0][0];
    2350           0 :   int outstride = 32;
    2351             : 
    2352             :   // inverse transform row vectors
    2353           0 :   for (i = 0; i < 32; ++i) {
    2354           0 :     HIGH_IHT_32[tx_type].rows(input, out[i], bd);
    2355           0 :     input += 32;
    2356             :   }
    2357             : 
    2358             :   // transpose
    2359           0 :   for (i = 0; i < 32; i++) {
    2360           0 :     for (j = 0; j < 32; j++) {
    2361           0 :       tmp[j][i] = out[i][j];
    2362             :     }
    2363             :   }
    2364             : 
    2365             :   // inverse transform column vectors
    2366           0 :   for (i = 0; i < 32; ++i) {
    2367           0 :     HIGH_IHT_32[tx_type].cols(tmp[i], out[i], bd);
    2368             :   }
    2369             : 
    2370           0 :   maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 32, 32);
    2371             : 
    2372             :   // Sum with the destination
    2373           0 :   for (i = 0; i < 32; ++i) {
    2374           0 :     for (j = 0; j < 32; ++j) {
    2375           0 :       int d = i * stride + j;
    2376           0 :       int s = j * outstride + i;
    2377           0 :       dest[d] =
    2378           0 :           highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
    2379             :     }
    2380             :   }
    2381           0 : }
    2382             : #endif  // CONFIG_EXT_TX
    2383             : 
    2384             : #if CONFIG_TX64X64
    2385             : static void highbd_iht64x64_4096_add_c(const tran_low_t *input, uint8_t *dest8,
    2386             :                                        int stride, int tx_type, int bd) {
    2387             :   static const highbd_transform_2d HIGH_IHT_64[] = {
    2388             :     { highbd_idct64_col_c, highbd_idct64_row_c },      // DCT_DCT
    2389             :     { highbd_ihalfright64_c, highbd_idct64_row_c },    // ADST_DCT
    2390             :     { highbd_idct64_col_c, highbd_ihalfright64_c },    // DCT_ADST
    2391             :     { highbd_ihalfright64_c, highbd_ihalfright64_c },  // ADST_ADST
    2392             : #if CONFIG_EXT_TX
    2393             :     { highbd_ihalfright64_c, highbd_idct64_row_c },    // FLIPADST_DCT
    2394             :     { highbd_idct64_col_c, highbd_ihalfright64_c },    // DCT_FLIPADST
    2395             :     { highbd_ihalfright64_c, highbd_ihalfright64_c },  // FLIPADST_FLIPADST
    2396             :     { highbd_ihalfright64_c, highbd_ihalfright64_c },  // ADST_FLIPADST
    2397             :     { highbd_ihalfright64_c, highbd_ihalfright64_c },  // FLIPADST_ADST
    2398             :     { highbd_iidtx64_c, highbd_iidtx64_c },            // IDTX
    2399             :     { highbd_idct64_col_c, highbd_iidtx64_c },         // V_DCT
    2400             :     { highbd_iidtx64_c, highbd_idct64_row_c },         // H_DCT
    2401             :     { highbd_ihalfright64_c, highbd_iidtx64_c },       // V_ADST
    2402             :     { highbd_iidtx64_c, highbd_ihalfright64_c },       // H_ADST
    2403             :     { highbd_ihalfright64_c, highbd_iidtx64_c },       // V_FLIPADST
    2404             :     { highbd_iidtx64_c, highbd_ihalfright64_c },       // H_FLIPADST
    2405             : #endif                                                 // CONFIG_EXT_TX
    2406             :   };
    2407             : 
    2408             :   uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
    2409             : 
    2410             :   int i, j;
    2411             :   tran_low_t tmp[64][64];
    2412             :   tran_low_t out[64][64];
    2413             :   tran_low_t *outp = &out[0][0];
    2414             :   int outstride = 64;
    2415             : 
    2416             :   // inverse transform row vectors
    2417             :   for (i = 0; i < 64; ++i) {
    2418             :     HIGH_IHT_64[tx_type].rows(input, out[i], bd);
    2419             :     for (j = 0; j < 64; ++j) out[i][j] = ROUND_POWER_OF_TWO(out[i][j], 1);
    2420             :     input += 64;
    2421             :   }
    2422             : 
    2423             :   // transpose
    2424             :   for (i = 0; i < 64; i++) {
    2425             :     for (j = 0; j < 64; j++) {
    2426             :       tmp[j][i] = out[i][j];
    2427             :     }
    2428             :   }
    2429             : 
    2430             :   // inverse transform column vectors
    2431             :   for (i = 0; i < 64; ++i) {
    2432             :     HIGH_IHT_64[tx_type].cols(tmp[i], out[i], bd);
    2433             :   }
    2434             : 
    2435             : #if CONFIG_EXT_TX
    2436             :   maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 64, 64);
    2437             : #endif  // CONFIG_EXT_TX
    2438             : 
    2439             :   // Sum with the destination
    2440             :   for (i = 0; i < 64; ++i) {
    2441             :     for (j = 0; j < 64; ++j) {
    2442             :       int d = i * stride + j;
    2443             :       int s = j * outstride + i;
    2444             :       dest[d] =
    2445             :           highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5), bd);
    2446             :     }
    2447             :   }
    2448             : }
    2449             : #endif  // CONFIG_TX64X64
    2450             : 
    2451             : // idct
    2452           0 : void av1_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
    2453             :                             int eob, int bd) {
    2454           0 :   if (eob > 1)
    2455           0 :     aom_highbd_idct4x4_16_add(input, dest, stride, bd);
    2456             :   else
    2457           0 :     aom_highbd_idct4x4_1_add(input, dest, stride, bd);
    2458           0 : }
    2459             : 
    2460           0 : void av1_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
    2461             :                             int eob, int bd) {
    2462           0 :   if (eob > 1)
    2463           0 :     aom_highbd_iwht4x4_16_add(input, dest, stride, bd);
    2464             :   else
    2465           0 :     aom_highbd_iwht4x4_1_add(input, dest, stride, bd);
    2466           0 : }
    2467             : 
    2468             : #if CONFIG_CHROMA_2X2
    2469             : static void highbd_inv_txfm_add_2x2(const tran_low_t *input, uint8_t *dest,
    2470             :                                     int stride, int eob, int bd,
    2471             :                                     TX_TYPE tx_type, int lossless) {
    2472             :   tran_high_t a1 = input[0] >> UNIT_QUANT_SHIFT;
    2473             :   tran_high_t b1 = input[1] >> UNIT_QUANT_SHIFT;
    2474             :   tran_high_t c1 = input[2] >> UNIT_QUANT_SHIFT;
    2475             :   tran_high_t d1 = input[3] >> UNIT_QUANT_SHIFT;
    2476             : 
    2477             :   tran_high_t a2 = a1 + c1;
    2478             :   tran_high_t b2 = b1 + d1;
    2479             :   tran_high_t c2 = a1 - c1;
    2480             :   tran_high_t d2 = b1 - d1;
    2481             : 
    2482             :   uint16_t *dst = CONVERT_TO_SHORTPTR(dest);
    2483             : 
    2484             :   (void)tx_type;
    2485             :   (void)lossless;
    2486             :   (void)eob;
    2487             : 
    2488             :   a1 = (a2 + b2) >> 2;
    2489             :   b1 = (a2 - b2) >> 2;
    2490             :   c1 = (c2 + d2) >> 2;
    2491             :   d1 = (c2 - d2) >> 2;
    2492             : 
    2493             :   dst[0] = highbd_clip_pixel_add(dst[0], a1, bd);
    2494             :   dst[1] = highbd_clip_pixel_add(dst[1], b1, bd);
    2495             :   dst[stride] = highbd_clip_pixel_add(dst[stride], c1, bd);
    2496             :   dst[stride + 1] = highbd_clip_pixel_add(dst[stride + 1], d1, bd);
    2497             : }
    2498             : #endif
    2499             : 
    2500           0 : void av1_highbd_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest,
    2501             :                                  int stride, int eob, int bd, TX_TYPE tx_type,
    2502             :                                  int lossless) {
    2503           0 :   if (lossless) {
    2504           0 :     assert(tx_type == DCT_DCT);
    2505           0 :     av1_highbd_iwht4x4_add(input, dest, stride, eob, bd);
    2506           0 :     return;
    2507             :   }
    2508             : 
    2509           0 :   switch (tx_type) {
    2510             :     case DCT_DCT:
    2511             :     case ADST_DCT:
    2512             :     case DCT_ADST:
    2513             :     case ADST_ADST:
    2514             : #if CONFIG_EXT_TX
    2515             :     case FLIPADST_DCT:
    2516             :     case DCT_FLIPADST:
    2517             :     case FLIPADST_FLIPADST:
    2518             :     case ADST_FLIPADST:
    2519             :     case FLIPADST_ADST:
    2520             : #endif  // CONFIG_EXT_TX
    2521           0 :       av1_inv_txfm2d_add_4x4(input, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
    2522             :                              bd);
    2523           0 :       break;
    2524             : #if CONFIG_EXT_TX
    2525             :     case V_DCT:
    2526             :     case H_DCT:
    2527             :     case V_ADST:
    2528             :     case H_ADST:
    2529             :     case V_FLIPADST:
    2530             :     case H_FLIPADST:
    2531             :       // Use C version since DST only exists in C code
    2532           0 :       av1_highbd_iht4x4_16_add_c(input, dest, stride, tx_type, bd);
    2533           0 :       break;
    2534             :     case IDTX:
    2535           0 :       highbd_inv_idtx_add_c(input, dest, stride, 4, tx_type, bd);
    2536           0 :       break;
    2537             : #endif  // CONFIG_EXT_TX
    2538           0 :     default: assert(0); break;
    2539             :   }
    2540             : }
    2541             : 
    2542           0 : void av1_highbd_inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest,
    2543             :                                  int stride, int eob, int bd, TX_TYPE tx_type) {
    2544             :   (void)eob;
    2545           0 :   av1_highbd_iht4x8_32_add_c(input, dest, stride, tx_type, bd);
    2546           0 : }
    2547             : 
    2548           0 : void av1_highbd_inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest,
    2549             :                                  int stride, int eob, int bd, TX_TYPE tx_type) {
    2550             :   (void)eob;
    2551           0 :   av1_highbd_iht8x4_32_add_c(input, dest, stride, tx_type, bd);
    2552           0 : }
    2553             : 
    2554           0 : void av1_highbd_inv_txfm_add_4x16(const tran_low_t *input, uint8_t *dest,
    2555             :                                   int stride, int eob, int bd,
    2556             :                                   TX_TYPE tx_type) {
    2557             :   (void)eob;
    2558           0 :   av1_highbd_iht4x16_64_add_c(input, dest, stride, tx_type, bd);
    2559           0 : }
    2560             : 
    2561           0 : void av1_highbd_inv_txfm_add_16x4(const tran_low_t *input, uint8_t *dest,
    2562             :                                   int stride, int eob, int bd,
    2563             :                                   TX_TYPE tx_type) {
    2564             :   (void)eob;
    2565           0 :   av1_highbd_iht16x4_64_add_c(input, dest, stride, tx_type, bd);
    2566           0 : }
    2567             : 
    2568           0 : static void highbd_inv_txfm_add_8x16(const tran_low_t *input, uint8_t *dest,
    2569             :                                      int stride, int eob, int bd,
    2570             :                                      TX_TYPE tx_type) {
    2571             :   (void)eob;
    2572           0 :   av1_highbd_iht8x16_128_add_c(input, dest, stride, tx_type, bd);
    2573           0 : }
    2574             : 
    2575           0 : static void highbd_inv_txfm_add_16x8(const tran_low_t *input, uint8_t *dest,
    2576             :                                      int stride, int eob, int bd,
    2577             :                                      TX_TYPE tx_type) {
    2578             :   (void)eob;
    2579           0 :   av1_highbd_iht16x8_128_add_c(input, dest, stride, tx_type, bd);
    2580           0 : }
    2581             : 
    2582           0 : void av1_highbd_inv_txfm_add_8x32(const tran_low_t *input, uint8_t *dest,
    2583             :                                   int stride, int eob, int bd,
    2584             :                                   TX_TYPE tx_type) {
    2585             :   (void)eob;
    2586           0 :   av1_highbd_iht8x32_256_add_c(input, dest, stride, tx_type, bd);
    2587           0 : }
    2588             : 
    2589           0 : void av1_highbd_inv_txfm_add_32x8(const tran_low_t *input, uint8_t *dest,
    2590             :                                   int stride, int eob, int bd,
    2591             :                                   TX_TYPE tx_type) {
    2592             :   (void)eob;
    2593           0 :   av1_highbd_iht32x8_256_add_c(input, dest, stride, tx_type, bd);
    2594           0 : }
    2595             : 
    2596           0 : static void highbd_inv_txfm_add_16x32(const tran_low_t *input, uint8_t *dest,
    2597             :                                       int stride, int eob, int bd,
    2598             :                                       TX_TYPE tx_type) {
    2599             :   (void)eob;
    2600           0 :   av1_highbd_iht16x32_512_add_c(input, dest, stride, tx_type, bd);
    2601           0 : }
    2602             : 
    2603           0 : static void highbd_inv_txfm_add_32x16(const tran_low_t *input, uint8_t *dest,
    2604             :                                       int stride, int eob, int bd,
    2605             :                                       TX_TYPE tx_type) {
    2606             :   (void)eob;
    2607           0 :   av1_highbd_iht32x16_512_add_c(input, dest, stride, tx_type, bd);
    2608           0 : }
    2609             : 
    2610           0 : static void highbd_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest,
    2611             :                                     int stride, int eob, int bd,
    2612             :                                     TX_TYPE tx_type) {
    2613             :   (void)eob;
    2614           0 :   switch (tx_type) {
    2615             :     case DCT_DCT:
    2616             :     case ADST_DCT:
    2617             :     case DCT_ADST:
    2618             :     case ADST_ADST:
    2619             : #if CONFIG_EXT_TX
    2620             :     case FLIPADST_DCT:
    2621             :     case DCT_FLIPADST:
    2622             :     case FLIPADST_FLIPADST:
    2623             :     case ADST_FLIPADST:
    2624             :     case FLIPADST_ADST:
    2625             : #endif  // CONFIG_EXT_TX
    2626           0 :       av1_inv_txfm2d_add_8x8(input, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
    2627             :                              bd);
    2628           0 :       break;
    2629             : #if CONFIG_EXT_TX
    2630             :     case V_DCT:
    2631             :     case H_DCT:
    2632             :     case V_ADST:
    2633             :     case H_ADST:
    2634             :     case V_FLIPADST:
    2635             :     case H_FLIPADST:
    2636             :       // Use C version since DST only exists in C code
    2637           0 :       av1_highbd_iht8x8_64_add_c(input, dest, stride, tx_type, bd);
    2638           0 :       break;
    2639             :     case IDTX:
    2640           0 :       highbd_inv_idtx_add_c(input, dest, stride, 8, tx_type, bd);
    2641           0 :       break;
    2642             : #endif  // CONFIG_EXT_TX
    2643           0 :     default: assert(0); break;
    2644             :   }
    2645           0 : }
    2646             : 
    2647           0 : static void highbd_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest,
    2648             :                                       int stride, int eob, int bd,
    2649             :                                       TX_TYPE tx_type) {
    2650             :   (void)eob;
    2651           0 :   switch (tx_type) {
    2652             :     case DCT_DCT:
    2653             :     case ADST_DCT:
    2654             :     case DCT_ADST:
    2655             :     case ADST_ADST:
    2656             : #if CONFIG_EXT_TX
    2657             :     case FLIPADST_DCT:
    2658             :     case DCT_FLIPADST:
    2659             :     case FLIPADST_FLIPADST:
    2660             :     case ADST_FLIPADST:
    2661             :     case FLIPADST_ADST:
    2662             : #endif  // CONFIG_EXT_TX
    2663           0 :       av1_inv_txfm2d_add_16x16(input, CONVERT_TO_SHORTPTR(dest), stride,
    2664             :                                tx_type, bd);
    2665           0 :       break;
    2666             : #if CONFIG_EXT_TX
    2667             :     case V_DCT:
    2668             :     case H_DCT:
    2669             :     case V_ADST:
    2670             :     case H_ADST:
    2671             :     case V_FLIPADST:
    2672             :     case H_FLIPADST:
    2673             :       // Use C version since DST only exists in C code
    2674           0 :       av1_highbd_iht16x16_256_add_c(input, dest, stride, tx_type, bd);
    2675           0 :       break;
    2676             :     case IDTX:
    2677           0 :       highbd_inv_idtx_add_c(input, dest, stride, 16, tx_type, bd);
    2678           0 :       break;
    2679             : #endif  // CONFIG_EXT_TX
    2680           0 :     default: assert(0); break;
    2681             :   }
    2682           0 : }
    2683             : 
    2684           0 : static void highbd_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest,
    2685             :                                       int stride, int eob, int bd,
    2686             :                                       TX_TYPE tx_type) {
    2687             :   (void)eob;
    2688           0 :   switch (tx_type) {
    2689             :     case DCT_DCT:
    2690           0 :       av1_inv_txfm2d_add_32x32(input, CONVERT_TO_SHORTPTR(dest), stride,
    2691             :                                DCT_DCT, bd);
    2692           0 :       break;
    2693             : #if CONFIG_EXT_TX
    2694             :     case ADST_DCT:
    2695             :     case DCT_ADST:
    2696             :     case ADST_ADST:
    2697             :     case FLIPADST_DCT:
    2698             :     case DCT_FLIPADST:
    2699             :     case FLIPADST_FLIPADST:
    2700             :     case ADST_FLIPADST:
    2701             :     case FLIPADST_ADST:
    2702             :     case V_DCT:
    2703             :     case H_DCT:
    2704             :     case V_ADST:
    2705             :     case H_ADST:
    2706             :     case V_FLIPADST:
    2707             :     case H_FLIPADST:
    2708           0 :       highbd_iht32x32_1024_add_c(input, dest, stride, tx_type, bd);
    2709           0 :       break;
    2710             :     case IDTX:
    2711           0 :       highbd_inv_idtx_add_c(input, dest, stride, 32, tx_type, bd);
    2712           0 :       break;
    2713             : #endif  // CONFIG_EXT_TX
    2714           0 :     default: assert(0); break;
    2715             :   }
    2716           0 : }
    2717             : 
    2718             : #if CONFIG_TX64X64
    2719             : static void highbd_inv_txfm_add_64x64(const tran_low_t *input, uint8_t *dest,
    2720             :                                       int stride, int eob, int bd,
    2721             :                                       TX_TYPE tx_type) {
    2722             :   (void)eob;
    2723             :   switch (tx_type) {
    2724             :     case DCT_DCT:
    2725             :       av1_inv_txfm2d_add_64x64(input, CONVERT_TO_SHORTPTR(dest), stride,
    2726             :                                DCT_DCT, bd);
    2727             :       break;
    2728             : #if CONFIG_EXT_TX
    2729             :     case ADST_DCT:
    2730             :     case DCT_ADST:
    2731             :     case ADST_ADST:
    2732             :     case FLIPADST_DCT:
    2733             :     case DCT_FLIPADST:
    2734             :     case FLIPADST_FLIPADST:
    2735             :     case ADST_FLIPADST:
    2736             :     case FLIPADST_ADST:
    2737             :     case V_DCT:
    2738             :     case H_DCT:
    2739             :     case V_ADST:
    2740             :     case H_ADST:
    2741             :     case V_FLIPADST:
    2742             :     case H_FLIPADST:
    2743             :       highbd_iht64x64_4096_add_c(input, dest, stride, tx_type, bd);
    2744             :       break;
    2745             :     case IDTX:
    2746             :       highbd_inv_idtx_add_c(input, dest, stride, 64, tx_type, bd);
    2747             :       break;
    2748             : #endif  // CONFIG_EXT_TX
    2749             :     default: assert(0); break;
    2750             :   }
    2751             : }
    2752             : #endif  // CONFIG_TX64X64
    2753             : #endif  // CONFIG_HIGHBITDEPTH
    2754             : 
    2755           0 : void av1_inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
    2756             :                       INV_TXFM_PARAM *param) {
    2757           0 :   const TX_TYPE tx_type = param->tx_type;
    2758           0 :   const TX_SIZE tx_size = param->tx_size;
    2759           0 :   const int eob = param->eob;
    2760           0 :   const int lossless = param->lossless;
    2761             : 
    2762           0 :   switch (tx_size) {
    2763             : #if CONFIG_TX64X64
    2764             :     case TX_64X64: inv_txfm_add_64x64(input, dest, stride, param); break;
    2765             : #endif  // CONFIG_TX64X64
    2766           0 :     case TX_32X32: inv_txfm_add_32x32(input, dest, stride, param); break;
    2767           0 :     case TX_16X16: inv_txfm_add_16x16(input, dest, stride, param); break;
    2768           0 :     case TX_8X8: inv_txfm_add_8x8(input, dest, stride, param); break;
    2769           0 :     case TX_4X8: inv_txfm_add_4x8(input, dest, stride, eob, tx_type); break;
    2770           0 :     case TX_8X4: inv_txfm_add_8x4(input, dest, stride, eob, tx_type); break;
    2771           0 :     case TX_8X16: inv_txfm_add_8x16(input, dest, stride, eob, tx_type); break;
    2772           0 :     case TX_16X8: inv_txfm_add_16x8(input, dest, stride, eob, tx_type); break;
    2773           0 :     case TX_16X32: inv_txfm_add_16x32(input, dest, stride, eob, tx_type); break;
    2774           0 :     case TX_32X16: inv_txfm_add_32x16(input, dest, stride, eob, tx_type); break;
    2775             :     case TX_4X4:
    2776             :       // this is like av1_short_idct4x4 but has a special case around eob<=1
    2777             :       // which is significant (not just an optimization) for the lossless
    2778             :       // case.
    2779           0 :       inv_txfm_add_4x4(input, dest, stride, eob, tx_type, lossless);
    2780           0 :       break;
    2781             : #if CONFIG_CHROMA_2X2
    2782             :     case TX_2X2:
    2783             :       inv_txfm_add_2x2(input, dest, stride, eob, tx_type, lossless);
    2784             :       break;
    2785             : #endif
    2786             : #if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
    2787             :     case TX_32X8: inv_txfm_add_32x8(input, dest, stride, eob, tx_type); break;
    2788             :     case TX_8X32: inv_txfm_add_8x32(input, dest, stride, eob, tx_type); break;
    2789             :     case TX_16X4: inv_txfm_add_16x4(input, dest, stride, eob, tx_type); break;
    2790             :     case TX_4X16: inv_txfm_add_4x16(input, dest, stride, eob, tx_type); break;
    2791             : #endif
    2792           0 :     default: assert(0 && "Invalid transform size"); break;
    2793             :   }
    2794           0 : }
    2795             : 
    2796           0 : static void init_inv_txfm_param(const MACROBLOCKD *xd, TX_SIZE tx_size,
    2797             :                                 TX_TYPE tx_type, int eob, INV_TXFM_PARAM *inv) {
    2798           0 :   inv->tx_type = tx_type;
    2799           0 :   inv->tx_size = tx_size;
    2800           0 :   inv->eob = eob;
    2801           0 :   inv->lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
    2802             : #if CONFIG_HIGHBITDEPTH
    2803           0 :   inv->bd = xd->bd;
    2804             : #endif
    2805             : #if CONFIG_ADAPT_SCAN
    2806             :   inv->eob_threshold =
    2807             :       (const int16_t *)&xd->eob_threshold_md[tx_size][tx_type][0];
    2808             : #endif
    2809           0 : }
    2810             : 
    2811           0 : void av1_inverse_transform_block(const MACROBLOCKD *xd,
    2812             :                                  const tran_low_t *dqcoeff, TX_TYPE tx_type,
    2813             :                                  TX_SIZE tx_size, uint8_t *dst, int stride,
    2814             :                                  int eob) {
    2815           0 :   if (!eob) return;
    2816             : #if CONFIG_PVQ
    2817             :   const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
    2818             :   const int txb_width = block_size_wide[tx_bsize];
    2819             :   const int txb_height = block_size_high[tx_bsize];
    2820             :   int r, c;
    2821             : #if CONFIG_HIGHBITDEPTH
    2822             :   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    2823             :     for (r = 0; r < txb_height; r++)
    2824             :       for (c = 0; c < txb_width; c++)
    2825             :         CONVERT_TO_SHORTPTR(dst)[r * stride + c] = 0;
    2826             :   } else {
    2827             : #endif  // CONFIG_HIGHBITDEPTH
    2828             :     for (r = 0; r < txb_height; r++)
    2829             :       for (c = 0; c < txb_width; c++) dst[r * stride + c] = 0;
    2830             : #if CONFIG_HIGHBITDEPTH
    2831             :   }
    2832             : #endif  // CONFIG_HIGHBITDEPTH
    2833             : #endif  // CONFIG_PVQ
    2834             :   INV_TXFM_PARAM inv_txfm_param;
    2835           0 :   init_inv_txfm_param(xd, tx_size, tx_type, eob, &inv_txfm_param);
    2836             : 
    2837             : #if CONFIG_HIGHBITDEPTH
    2838           0 :   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    2839           0 :     av1_highbd_inv_txfm_add(dqcoeff, dst, stride, &inv_txfm_param);
    2840             :   } else {
    2841             : #endif  // CONFIG_HIGHBITDEPTH
    2842           0 :     av1_inv_txfm_add(dqcoeff, dst, stride, &inv_txfm_param);
    2843             : #if CONFIG_HIGHBITDEPTH
    2844             :   }
    2845             : #endif  // CONFIG_HIGHBITDEPTH
    2846             : }
    2847             : 
    2848           0 : void av1_inverse_transform_block_facade(MACROBLOCKD *xd, int plane, int block,
    2849             :                                         int blk_row, int blk_col, int eob) {
    2850           0 :   struct macroblockd_plane *const pd = &xd->plane[plane];
    2851           0 :   tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
    2852           0 :   const PLANE_TYPE plane_type = get_plane_type(plane);
    2853           0 :   const TX_SIZE tx_size = get_tx_size(plane, xd);
    2854           0 :   const TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
    2855           0 :   const int dst_stride = pd->dst.stride;
    2856           0 :   uint8_t *dst =
    2857           0 :       &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
    2858           0 :   av1_inverse_transform_block(xd, dqcoeff, tx_type, tx_size, dst, dst_stride,
    2859             :                               eob);
    2860           0 : }
    2861             : 
    2862             : #if CONFIG_HIGHBITDEPTH
    2863           0 : void av1_highbd_inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
    2864             :                              INV_TXFM_PARAM *inv_txfm_param) {
    2865           0 :   const TX_TYPE tx_type = inv_txfm_param->tx_type;
    2866           0 :   const TX_SIZE tx_size = inv_txfm_param->tx_size;
    2867           0 :   const int eob = inv_txfm_param->eob;
    2868           0 :   const int bd = inv_txfm_param->bd;
    2869           0 :   const int lossless = inv_txfm_param->lossless;
    2870             : 
    2871           0 :   switch (tx_size) {
    2872             : #if CONFIG_TX64X64
    2873             :     case TX_64X64:
    2874             :       highbd_inv_txfm_add_64x64(input, dest, stride, eob, bd, tx_type);
    2875             :       break;
    2876             : #endif  // CONFIG_TX64X64
    2877             :     case TX_32X32:
    2878           0 :       highbd_inv_txfm_add_32x32(input, dest, stride, eob, bd, tx_type);
    2879           0 :       break;
    2880             :     case TX_16X16:
    2881           0 :       highbd_inv_txfm_add_16x16(input, dest, stride, eob, bd, tx_type);
    2882           0 :       break;
    2883             :     case TX_8X8:
    2884           0 :       highbd_inv_txfm_add_8x8(input, dest, stride, eob, bd, tx_type);
    2885           0 :       break;
    2886             :     case TX_4X8:
    2887           0 :       av1_highbd_inv_txfm_add_4x8(input, dest, stride, eob, bd, tx_type);
    2888           0 :       break;
    2889             :     case TX_8X4:
    2890           0 :       av1_highbd_inv_txfm_add_8x4(input, dest, stride, eob, bd, tx_type);
    2891           0 :       break;
    2892             :     case TX_8X16:
    2893           0 :       highbd_inv_txfm_add_8x16(input, dest, stride, eob, bd, tx_type);
    2894           0 :       break;
    2895             :     case TX_16X8:
    2896           0 :       highbd_inv_txfm_add_16x8(input, dest, stride, eob, bd, tx_type);
    2897           0 :       break;
    2898             :     case TX_16X32:
    2899           0 :       highbd_inv_txfm_add_16x32(input, dest, stride, eob, bd, tx_type);
    2900           0 :       break;
    2901             :     case TX_32X16:
    2902           0 :       highbd_inv_txfm_add_32x16(input, dest, stride, eob, bd, tx_type);
    2903           0 :       break;
    2904             :     case TX_4X4:
    2905             :       // this is like av1_short_idct4x4 but has a special case around eob<=1
    2906             :       // which is significant (not just an optimization) for the lossless
    2907             :       // case.
    2908           0 :       av1_highbd_inv_txfm_add_4x4(input, dest, stride, eob, bd, tx_type,
    2909             :                                   lossless);
    2910           0 :       break;
    2911             : #if CONFIG_CHROMA_2X2
    2912             :     case TX_2X2:
    2913             :       highbd_inv_txfm_add_2x2(input, dest, stride, eob, bd, tx_type, lossless);
    2914             :       break;
    2915             : #endif
    2916           0 :     default: assert(0 && "Invalid transform size"); break;
    2917             :   }
    2918           0 : }
    2919             : #endif  // CONFIG_HIGHBITDEPTH
    2920             : 
    2921             : #if CONFIG_DPCM_INTRA
    2922             : void av1_dpcm_inv_txfm_add_4_c(const tran_low_t *input, int stride,
    2923             :                                TX_TYPE_1D tx_type, uint8_t *dest) {
    2924             :   assert(tx_type < TX_TYPES_1D);
    2925             :   static const transform_1d IHT[] = { aom_idct4_c, aom_iadst4_c, aom_iadst4_c,
    2926             :                                       iidtx4_c };
    2927             :   const transform_1d inv_tx = IHT[tx_type];
    2928             :   tran_low_t out[4];
    2929             :   inv_tx(input, out);
    2930             :   for (int i = 0; i < 4; ++i) {
    2931             :     out[i] = (tran_low_t)dct_const_round_shift(out[i] * Sqrt2);
    2932             :     dest[i * stride] =
    2933             :         clip_pixel_add(dest[i * stride], ROUND_POWER_OF_TWO(out[i], 4));
    2934             :   }
    2935             : }
    2936             : 
    2937             : void av1_dpcm_inv_txfm_add_8_c(const tran_low_t *input, int stride,
    2938             :                                TX_TYPE_1D tx_type, uint8_t *dest) {
    2939             :   assert(tx_type < TX_TYPES_1D);
    2940             :   static const transform_1d IHT[] = { aom_idct8_c, aom_iadst8_c, aom_iadst8_c,
    2941             :                                       iidtx8_c };
    2942             :   const transform_1d inv_tx = IHT[tx_type];
    2943             :   tran_low_t out[8];
    2944             :   inv_tx(input, out);
    2945             :   for (int i = 0; i < 8; ++i) {
    2946             :     dest[i * stride] =
    2947             :         clip_pixel_add(dest[i * stride], ROUND_POWER_OF_TWO(out[i], 4));
    2948             :   }
    2949             : }
    2950             : 
    2951             : void av1_dpcm_inv_txfm_add_16_c(const tran_low_t *input, int stride,
    2952             :                                 TX_TYPE_1D tx_type, uint8_t *dest) {
    2953             :   assert(tx_type < TX_TYPES_1D);
    2954             :   static const transform_1d IHT[] = { aom_idct16_c, aom_iadst16_c,
    2955             :                                       aom_iadst16_c, iidtx16_c };
    2956             :   const transform_1d inv_tx = IHT[tx_type];
    2957             :   tran_low_t out[16];
    2958             :   inv_tx(input, out);
    2959             :   for (int i = 0; i < 16; ++i) {
    2960             :     out[i] = (tran_low_t)dct_const_round_shift(out[i] * Sqrt2);
    2961             :     dest[i * stride] =
    2962             :         clip_pixel_add(dest[i * stride], ROUND_POWER_OF_TWO(out[i], 5));
    2963             :   }
    2964             : }
    2965             : 
    2966             : void av1_dpcm_inv_txfm_add_32_c(const tran_low_t *input, int stride,
    2967             :                                 TX_TYPE_1D tx_type, uint8_t *dest) {
    2968             :   assert(tx_type < TX_TYPES_1D);
    2969             :   static const transform_1d IHT[] = { aom_idct32_c, ihalfright32_c,
    2970             :                                       ihalfright32_c, iidtx32_c };
    2971             :   const transform_1d inv_tx = IHT[tx_type];
    2972             :   tran_low_t out[32];
    2973             :   inv_tx(input, out);
    2974             :   for (int i = 0; i < 32; ++i) {
    2975             :     dest[i * stride] =
    2976             :         clip_pixel_add(dest[i * stride], ROUND_POWER_OF_TWO(out[i], 4));
    2977             :   }
    2978             : }
    2979             : 
    2980             : dpcm_inv_txfm_add_func av1_get_dpcm_inv_txfm_add_func(int tx_length) {
    2981             :   switch (tx_length) {
    2982             :     case 4: return av1_dpcm_inv_txfm_add_4_c;
    2983             :     case 8: return av1_dpcm_inv_txfm_add_8_c;
    2984             :     case 16: return av1_dpcm_inv_txfm_add_16_c;
    2985             :     case 32:
    2986             :       return av1_dpcm_inv_txfm_add_32_c;
    2987             :     // TODO(huisu): add support for TX_64X64.
    2988             :     default: assert(0); return NULL;
    2989             :   }
    2990             : }
    2991             : 
    2992             : #if CONFIG_HIGHBITDEPTH
    2993             : void av1_hbd_dpcm_inv_txfm_add_4_c(const tran_low_t *input, int stride,
    2994             :                                    TX_TYPE_1D tx_type, int bd, uint16_t *dest) {
    2995             :   assert(tx_type < TX_TYPES_1D);
    2996             :   static const highbd_transform_1d IHT[] = { aom_highbd_idct4_c,
    2997             :                                              aom_highbd_iadst4_c,
    2998             :                                              aom_highbd_iadst4_c,
    2999             :                                              highbd_iidtx4_c };
    3000             :   const highbd_transform_1d inv_tx = IHT[tx_type];
    3001             :   tran_low_t out[4];
    3002             :   inv_tx(input, out, bd);
    3003             :   for (int i = 0; i < 4; ++i) {
    3004             :     out[i] = (tran_low_t)dct_const_round_shift(out[i] * Sqrt2);
    3005             :     dest[i * stride] = highbd_clip_pixel_add(dest[i * stride],
    3006             :                                              ROUND_POWER_OF_TWO(out[i], 4), bd);
    3007             :   }
    3008             : }
    3009             : 
    3010             : void av1_hbd_dpcm_inv_txfm_add_8_c(const tran_low_t *input, int stride,
    3011             :                                    TX_TYPE_1D tx_type, int bd, uint16_t *dest) {
    3012             :   static const highbd_transform_1d IHT[] = { aom_highbd_idct8_c,
    3013             :                                              aom_highbd_iadst8_c,
    3014             :                                              aom_highbd_iadst8_c,
    3015             :                                              highbd_iidtx8_c };
    3016             :   assert(tx_type < TX_TYPES_1D);
    3017             :   const highbd_transform_1d inv_tx = IHT[tx_type];
    3018             :   tran_low_t out[8];
    3019             :   inv_tx(input, out, bd);
    3020             :   for (int i = 0; i < 8; ++i) {
    3021             :     dest[i * stride] = highbd_clip_pixel_add(dest[i * stride],
    3022             :                                              ROUND_POWER_OF_TWO(out[i], 4), bd);
    3023             :   }
    3024             : }
    3025             : 
    3026             : void av1_hbd_dpcm_inv_txfm_add_16_c(const tran_low_t *input, int stride,
    3027             :                                     TX_TYPE_1D tx_type, int bd,
    3028             :                                     uint16_t *dest) {
    3029             :   assert(tx_type < TX_TYPES_1D);
    3030             :   static const highbd_transform_1d IHT[] = { aom_highbd_idct16_c,
    3031             :                                              aom_highbd_iadst16_c,
    3032             :                                              aom_highbd_iadst16_c,
    3033             :                                              highbd_iidtx16_c };
    3034             :   const highbd_transform_1d inv_tx = IHT[tx_type];
    3035             :   tran_low_t out[16];
    3036             :   inv_tx(input, out, bd);
    3037             :   for (int i = 0; i < 16; ++i) {
    3038             :     out[i] = (tran_low_t)dct_const_round_shift(out[i] * Sqrt2);
    3039             :     dest[i * stride] = highbd_clip_pixel_add(dest[i * stride],
    3040             :                                              ROUND_POWER_OF_TWO(out[i], 5), bd);
    3041             :   }
    3042             : }
    3043             : 
    3044             : void av1_hbd_dpcm_inv_txfm_add_32_c(const tran_low_t *input, int stride,
    3045             :                                     TX_TYPE_1D tx_type, int bd,
    3046             :                                     uint16_t *dest) {
    3047             :   assert(tx_type < TX_TYPES_1D);
    3048             :   static const highbd_transform_1d IHT[] = { aom_highbd_idct32_c,
    3049             :                                              highbd_ihalfright32_c,
    3050             :                                              highbd_ihalfright32_c,
    3051             :                                              highbd_iidtx32_c };
    3052             :   const highbd_transform_1d inv_tx = IHT[tx_type];
    3053             :   tran_low_t out[32];
    3054             :   inv_tx(input, out, bd);
    3055             :   for (int i = 0; i < 32; ++i) {
    3056             :     dest[i * stride] = highbd_clip_pixel_add(dest[i * stride],
    3057             :                                              ROUND_POWER_OF_TWO(out[i], 4), bd);
    3058             :   }
    3059             : }
    3060             : 
    3061             : hbd_dpcm_inv_txfm_add_func av1_get_hbd_dpcm_inv_txfm_add_func(int tx_length) {
    3062             :   switch (tx_length) {
    3063             :     case 4: return av1_hbd_dpcm_inv_txfm_add_4_c;
    3064             :     case 8: return av1_hbd_dpcm_inv_txfm_add_8_c;
    3065             :     case 16: return av1_hbd_dpcm_inv_txfm_add_16_c;
    3066             :     case 32:
    3067             :       return av1_hbd_dpcm_inv_txfm_add_32_c;
    3068             :     // TODO(huisu): add support for TX_64X64.
    3069             :     default: assert(0); return NULL;
    3070             :   }
    3071             : }
    3072             : #endif  // CONFIG_HIGHBITDEPTH
    3073             : #endif  // CONFIG_DPCM_INTRA

Generated by: LCOV version 1.13