Line data Source code
1 : /*
2 : * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 : *
4 : * This source code is subject to the terms of the BSD 2 Clause License and
5 : * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 : * was not distributed with this source code in the LICENSE file, you can
7 : * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 : * Media Patent License 1.0 was not distributed with this source code in the
9 : * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 : */
11 :
12 : #include <math.h>
13 :
14 : #include "./aom_dsp_rtcd.h"
15 : #include "./av1_rtcd.h"
16 : #include "aom_dsp/inv_txfm.h"
17 : #include "aom_ports/mem.h"
18 : #include "av1/common/av1_inv_txfm1d_cfg.h"
19 : #include "av1/common/blockd.h"
20 : #include "av1/common/enums.h"
21 : #include "av1/common/idct.h"
22 :
23 0 : int av1_get_tx_scale(const TX_SIZE tx_size) {
24 0 : if (txsize_sqr_up_map[tx_size] == TX_32X32) return 1;
25 : #if CONFIG_TX64X64
26 : else if (txsize_sqr_up_map[tx_size] == TX_64X64)
27 : return 2;
28 : #endif // CONFIG_TX64X64
29 : else
30 0 : return 0;
31 : }
32 :
33 : // NOTE: The implementation of all inverses need to be aware of the fact
34 : // that input and output could be the same buffer.
35 :
36 : #if CONFIG_EXT_TX
37 0 : static void iidtx4_c(const tran_low_t *input, tran_low_t *output) {
38 : int i;
39 0 : for (i = 0; i < 4; ++i)
40 0 : output[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
41 0 : }
42 :
43 0 : static void iidtx8_c(const tran_low_t *input, tran_low_t *output) {
44 : int i;
45 0 : for (i = 0; i < 8; ++i) output[i] = input[i] * 2;
46 0 : }
47 :
48 0 : static void iidtx16_c(const tran_low_t *input, tran_low_t *output) {
49 : int i;
50 0 : for (i = 0; i < 16; ++i)
51 0 : output[i] = (tran_low_t)dct_const_round_shift(input[i] * 2 * Sqrt2);
52 0 : }
53 :
54 0 : static void iidtx32_c(const tran_low_t *input, tran_low_t *output) {
55 : int i;
56 0 : for (i = 0; i < 32; ++i) output[i] = input[i] * 4;
57 0 : }
58 :
59 : #if CONFIG_TX64X64
60 : static void iidtx64_c(const tran_low_t *input, tran_low_t *output) {
61 : int i;
62 : for (i = 0; i < 64; ++i)
63 : output[i] = (tran_low_t)dct_const_round_shift(input[i] * 4 * Sqrt2);
64 : }
65 : #endif // CONFIG_TX64X64
66 : #endif // CONFIG_EXT_TX
67 :
68 : // For use in lieu of ADST
69 0 : static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) {
70 : int i;
71 : tran_low_t inputhalf[16];
72 : // Multiply input by sqrt(2)
73 0 : for (i = 0; i < 16; ++i) {
74 0 : inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
75 : }
76 0 : for (i = 0; i < 16; ++i) {
77 0 : output[i] = input[16 + i] * 4;
78 : }
79 0 : aom_idct16_c(inputhalf, output + 16);
80 : // Note overall scaling factor is 4 times orthogonal
81 0 : }
82 :
83 : #if CONFIG_TX64X64
84 : static void idct64_col_c(const tran_low_t *input, tran_low_t *output) {
85 : int32_t in[64], out[64];
86 : int i;
87 : for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
88 : av1_idct64_new(in, out, inv_cos_bit_col_dct_64, inv_stage_range_col_dct_64);
89 : for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
90 : }
91 :
92 : static void idct64_row_c(const tran_low_t *input, tran_low_t *output) {
93 : int32_t in[64], out[64];
94 : int i;
95 : for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
96 : av1_idct64_new(in, out, inv_cos_bit_row_dct_64, inv_stage_range_row_dct_64);
97 : for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
98 : }
99 :
100 : // For use in lieu of ADST
101 : static void ihalfright64_c(const tran_low_t *input, tran_low_t *output) {
102 : int i;
103 : tran_low_t inputhalf[32];
104 : // Multiply input by sqrt(2)
105 : for (i = 0; i < 32; ++i) {
106 : inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
107 : }
108 : for (i = 0; i < 32; ++i) {
109 : output[i] = (tran_low_t)dct_const_round_shift(input[32 + i] * 4 * Sqrt2);
110 : }
111 : aom_idct32_c(inputhalf, output + 32);
112 : // Note overall scaling factor is 4 * sqrt(2) times orthogonal
113 : }
114 : #endif // CONFIG_TX64X64
115 :
116 : #if CONFIG_HIGHBITDEPTH
117 : #if CONFIG_EXT_TX
118 : // TODO(sarahparker) these functions will be removed once the highbitdepth
119 : // codepath works properly for rectangular transforms. They have almost
120 : // identical versions in av1_inv_txfm1d.c, but those are currently only
121 : // being used for square transforms.
122 0 : static void highbd_iidtx4_c(const tran_low_t *input, tran_low_t *output,
123 : int bd) {
124 : int i;
125 0 : for (i = 0; i < 4; ++i)
126 0 : output[i] = HIGHBD_WRAPLOW(dct_const_round_shift(input[i] * Sqrt2), bd);
127 0 : }
128 :
129 0 : static void highbd_iidtx8_c(const tran_low_t *input, tran_low_t *output,
130 : int bd) {
131 : int i;
132 : (void)bd;
133 0 : for (i = 0; i < 8; ++i) output[i] = input[i] * 2;
134 0 : }
135 :
136 0 : static void highbd_iidtx16_c(const tran_low_t *input, tran_low_t *output,
137 : int bd) {
138 : int i;
139 0 : for (i = 0; i < 16; ++i)
140 0 : output[i] = HIGHBD_WRAPLOW(dct_const_round_shift(input[i] * 2 * Sqrt2), bd);
141 0 : }
142 :
143 0 : static void highbd_iidtx32_c(const tran_low_t *input, tran_low_t *output,
144 : int bd) {
145 : int i;
146 : (void)bd;
147 0 : for (i = 0; i < 32; ++i) output[i] = input[i] * 4;
148 0 : }
149 : #endif // CONFIG_EXT_TX
150 :
151 0 : static void highbd_ihalfright32_c(const tran_low_t *input, tran_low_t *output,
152 : int bd) {
153 : int i;
154 : tran_low_t inputhalf[16];
155 : // Multiply input by sqrt(2)
156 0 : for (i = 0; i < 16; ++i) {
157 0 : inputhalf[i] = HIGHBD_WRAPLOW(dct_const_round_shift(input[i] * Sqrt2), bd);
158 : }
159 0 : for (i = 0; i < 16; ++i) {
160 0 : output[i] = input[16 + i] * 4;
161 : }
162 0 : aom_highbd_idct16_c(inputhalf, output + 16, bd);
163 : // Note overall scaling factor is 4 times orthogonal
164 0 : }
165 :
166 : #if CONFIG_EXT_TX
167 : #if CONFIG_TX64X64
168 : static void highbd_iidtx64_c(const tran_low_t *input, tran_low_t *output,
169 : int bd) {
170 : int i;
171 : for (i = 0; i < 64; ++i)
172 : output[i] = HIGHBD_WRAPLOW(dct_const_round_shift(input[i] * 4 * Sqrt2), bd);
173 : }
174 : #endif // CONFIG_TX64X64
175 : #endif // CONFIG_EXT_TX
176 :
177 : #if CONFIG_TX64X64
178 : // For use in lieu of ADST
179 : static void highbd_ihalfright64_c(const tran_low_t *input, tran_low_t *output,
180 : int bd) {
181 : int i;
182 : tran_low_t inputhalf[32];
183 : // Multiply input by sqrt(2)
184 : for (i = 0; i < 32; ++i) {
185 : inputhalf[i] = HIGHBD_WRAPLOW(dct_const_round_shift(input[i] * Sqrt2), bd);
186 : }
187 : for (i = 0; i < 32; ++i) {
188 : output[i] =
189 : HIGHBD_WRAPLOW(dct_const_round_shift(input[32 + i] * 4 * Sqrt2), bd);
190 : }
191 : aom_highbd_idct32_c(inputhalf, output + 32, bd);
192 : // Note overall scaling factor is 4 * sqrt(2) times orthogonal
193 : }
194 :
195 : static void highbd_idct64_col_c(const tran_low_t *input, tran_low_t *output,
196 : int bd) {
197 : int32_t in[64], out[64];
198 : int i;
199 : (void)bd;
200 : for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
201 : av1_idct64_new(in, out, inv_cos_bit_col_dct_64, inv_stage_range_col_dct_64);
202 : for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
203 : }
204 :
205 : static void highbd_idct64_row_c(const tran_low_t *input, tran_low_t *output,
206 : int bd) {
207 : int32_t in[64], out[64];
208 : int i;
209 : (void)bd;
210 : for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
211 : av1_idct64_new(in, out, inv_cos_bit_row_dct_64, inv_stage_range_row_dct_64);
212 : for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
213 : }
214 : #endif // CONFIG_TX64X64
215 : #endif // CONFIG_HIGHBITDEPTH
216 :
217 : // Inverse identity transform and add.
218 : #if CONFIG_EXT_TX
219 0 : static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
220 : int bs, int tx_type) {
221 : int r, c;
222 0 : const int shift = bs < 32 ? 3 : (bs < 64 ? 2 : 1);
223 0 : if (tx_type == IDTX) {
224 0 : for (r = 0; r < bs; ++r) {
225 0 : for (c = 0; c < bs; ++c)
226 0 : dest[c] = clip_pixel_add(dest[c], input[c] >> shift);
227 0 : dest += stride;
228 0 : input += bs;
229 : }
230 : }
231 0 : }
232 : #endif // CONFIG_EXT_TX
233 :
234 : #define FLIPUD_PTR(dest, stride, size) \
235 : do { \
236 : (dest) = (dest) + ((size)-1) * (stride); \
237 : (stride) = -(stride); \
238 : } while (0)
239 :
240 : #if CONFIG_EXT_TX
241 0 : static void maybe_flip_strides(uint8_t **dst, int *dstride, tran_low_t **src,
242 : int *sstride, int tx_type, int sizey,
243 : int sizex) {
244 : // Note that the transpose of src will be added to dst. In order to LR
245 : // flip the addends (in dst coordinates), we UD flip the src. To UD flip
246 : // the addends, we UD flip the dst.
247 0 : switch (tx_type) {
248 : case DCT_DCT:
249 : case ADST_DCT:
250 : case DCT_ADST:
251 : case ADST_ADST:
252 : case IDTX:
253 : case V_DCT:
254 : case H_DCT:
255 : case V_ADST:
256 0 : case H_ADST: break;
257 : case FLIPADST_DCT:
258 : case FLIPADST_ADST:
259 : case V_FLIPADST:
260 : // flip UD
261 0 : FLIPUD_PTR(*dst, *dstride, sizey);
262 0 : break;
263 : case DCT_FLIPADST:
264 : case ADST_FLIPADST:
265 : case H_FLIPADST:
266 : // flip LR
267 0 : FLIPUD_PTR(*src, *sstride, sizex);
268 0 : break;
269 : case FLIPADST_FLIPADST:
270 : // flip UD
271 0 : FLIPUD_PTR(*dst, *dstride, sizey);
272 : // flip LR
273 0 : FLIPUD_PTR(*src, *sstride, sizex);
274 0 : break;
275 0 : default: assert(0); break;
276 : }
277 0 : }
278 : #endif // CONFIG_EXT_TX
279 :
280 : #if CONFIG_HIGHBITDEPTH
281 : #if CONFIG_EXT_TX
282 0 : static void highbd_inv_idtx_add_c(const tran_low_t *input, uint8_t *dest8,
283 : int stride, int bs, int tx_type, int bd) {
284 : int r, c;
285 0 : const int shift = bs < 32 ? 3 : 2;
286 0 : uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
287 :
288 0 : if (tx_type == IDTX) {
289 0 : for (r = 0; r < bs; ++r) {
290 0 : for (c = 0; c < bs; ++c)
291 0 : dest[c] = highbd_clip_pixel_add(dest[c], input[c] >> shift, bd);
292 0 : dest += stride;
293 0 : input += bs;
294 : }
295 : }
296 0 : }
297 :
298 0 : static void maybe_flip_strides16(uint16_t **dst, int *dstride, tran_low_t **src,
299 : int *sstride, int tx_type, int sizey,
300 : int sizex) {
301 : // Note that the transpose of src will be added to dst. In order to LR
302 : // flip the addends (in dst coordinates), we UD flip the src. To UD flip
303 : // the addends, we UD flip the dst.
304 0 : switch (tx_type) {
305 : case DCT_DCT:
306 : case ADST_DCT:
307 : case DCT_ADST:
308 : case ADST_ADST:
309 : case IDTX:
310 : case V_DCT:
311 : case H_DCT:
312 : case V_ADST:
313 0 : case H_ADST: break;
314 : case FLIPADST_DCT:
315 : case FLIPADST_ADST:
316 : case V_FLIPADST:
317 : // flip UD
318 0 : FLIPUD_PTR(*dst, *dstride, sizey);
319 0 : break;
320 : case DCT_FLIPADST:
321 : case ADST_FLIPADST:
322 : case H_FLIPADST:
323 : // flip LR
324 0 : FLIPUD_PTR(*src, *sstride, sizex);
325 0 : break;
326 : case FLIPADST_FLIPADST:
327 : // flip UD
328 0 : FLIPUD_PTR(*dst, *dstride, sizey);
329 : // flip LR
330 0 : FLIPUD_PTR(*src, *sstride, sizex);
331 0 : break;
332 0 : default: assert(0); break;
333 : }
334 0 : }
335 : #endif // CONFIG_EXT_TX
336 : #endif // CONFIG_HIGHBITDEPTH
337 :
338 0 : void av1_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
339 : int tx_type) {
340 : static const transform_2d IHT_4[] = {
341 : { aom_idct4_c, aom_idct4_c }, // DCT_DCT = 0
342 : { aom_iadst4_c, aom_idct4_c }, // ADST_DCT = 1
343 : { aom_idct4_c, aom_iadst4_c }, // DCT_ADST = 2
344 : { aom_iadst4_c, aom_iadst4_c }, // ADST_ADST = 3
345 : #if CONFIG_EXT_TX
346 : { aom_iadst4_c, aom_idct4_c }, // FLIPADST_DCT
347 : { aom_idct4_c, aom_iadst4_c }, // DCT_FLIPADST
348 : { aom_iadst4_c, aom_iadst4_c }, // FLIPADST_FLIPADST
349 : { aom_iadst4_c, aom_iadst4_c }, // ADST_FLIPADST
350 : { aom_iadst4_c, aom_iadst4_c }, // FLIPADST_ADST
351 : { iidtx4_c, iidtx4_c }, // IDTX
352 : { aom_idct4_c, iidtx4_c }, // V_DCT
353 : { iidtx4_c, aom_idct4_c }, // H_DCT
354 : { aom_iadst4_c, iidtx4_c }, // V_ADST
355 : { iidtx4_c, aom_iadst4_c }, // H_ADST
356 : { aom_iadst4_c, iidtx4_c }, // V_FLIPADST
357 : { iidtx4_c, aom_iadst4_c }, // H_FLIPADST
358 : #endif // CONFIG_EXT_TX
359 : };
360 :
361 : int i, j;
362 : tran_low_t tmp[4][4];
363 : tran_low_t out[4][4];
364 0 : tran_low_t *outp = &out[0][0];
365 0 : int outstride = 4;
366 :
367 : // inverse transform row vectors
368 0 : for (i = 0; i < 4; ++i) {
369 0 : IHT_4[tx_type].rows(input, out[i]);
370 0 : input += 4;
371 : }
372 :
373 : // transpose
374 0 : for (i = 0; i < 4; i++) {
375 0 : for (j = 0; j < 4; j++) {
376 0 : tmp[j][i] = out[i][j];
377 : }
378 : }
379 :
380 : // inverse transform column vectors
381 0 : for (i = 0; i < 4; ++i) {
382 0 : IHT_4[tx_type].cols(tmp[i], out[i]);
383 : }
384 :
385 : #if CONFIG_EXT_TX
386 0 : maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 4, 4);
387 : #endif
388 :
389 : // Sum with the destination
390 0 : for (i = 0; i < 4; ++i) {
391 0 : for (j = 0; j < 4; ++j) {
392 0 : int d = i * stride + j;
393 0 : int s = j * outstride + i;
394 0 : dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
395 : }
396 : }
397 0 : }
398 :
399 0 : void av1_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
400 : int tx_type) {
401 : static const transform_2d IHT_4x8[] = {
402 : { aom_idct8_c, aom_idct4_c }, // DCT_DCT
403 : { aom_iadst8_c, aom_idct4_c }, // ADST_DCT
404 : { aom_idct8_c, aom_iadst4_c }, // DCT_ADST
405 : { aom_iadst8_c, aom_iadst4_c }, // ADST_ADST
406 : #if CONFIG_EXT_TX
407 : { aom_iadst8_c, aom_idct4_c }, // FLIPADST_DCT
408 : { aom_idct8_c, aom_iadst4_c }, // DCT_FLIPADST
409 : { aom_iadst8_c, aom_iadst4_c }, // FLIPADST_FLIPADST
410 : { aom_iadst8_c, aom_iadst4_c }, // ADST_FLIPADST
411 : { aom_iadst8_c, aom_iadst4_c }, // FLIPADST_ADST
412 : { iidtx8_c, iidtx4_c }, // IDTX
413 : { aom_idct8_c, iidtx4_c }, // V_DCT
414 : { iidtx8_c, aom_idct4_c }, // H_DCT
415 : { aom_iadst8_c, iidtx4_c }, // V_ADST
416 : { iidtx8_c, aom_iadst4_c }, // H_ADST
417 : { aom_iadst8_c, iidtx4_c }, // V_FLIPADST
418 : { iidtx8_c, aom_iadst4_c }, // H_FLIPADST
419 : #endif
420 : };
421 :
422 0 : const int n = 4;
423 0 : const int n2 = 8;
424 : int i, j;
425 : tran_low_t out[4][8], tmp[4][8], outtmp[4];
426 0 : tran_low_t *outp = &out[0][0];
427 0 : int outstride = n2;
428 :
429 : // inverse transform row vectors and transpose
430 0 : for (i = 0; i < n2; ++i) {
431 0 : IHT_4x8[tx_type].rows(input, outtmp);
432 0 : for (j = 0; j < n; ++j)
433 0 : tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
434 0 : input += n;
435 : }
436 :
437 : // inverse transform column vectors
438 0 : for (i = 0; i < n; ++i) {
439 0 : IHT_4x8[tx_type].cols(tmp[i], out[i]);
440 : }
441 :
442 : #if CONFIG_EXT_TX
443 0 : maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
444 : #endif
445 :
446 : // Sum with the destination
447 0 : for (i = 0; i < n2; ++i) {
448 0 : for (j = 0; j < n; ++j) {
449 0 : int d = i * stride + j;
450 0 : int s = j * outstride + i;
451 0 : dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
452 : }
453 : }
454 0 : }
455 :
456 0 : void av1_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
457 : int tx_type) {
458 : static const transform_2d IHT_8x4[] = {
459 : { aom_idct4_c, aom_idct8_c }, // DCT_DCT
460 : { aom_iadst4_c, aom_idct8_c }, // ADST_DCT
461 : { aom_idct4_c, aom_iadst8_c }, // DCT_ADST
462 : { aom_iadst4_c, aom_iadst8_c }, // ADST_ADST
463 : #if CONFIG_EXT_TX
464 : { aom_iadst4_c, aom_idct8_c }, // FLIPADST_DCT
465 : { aom_idct4_c, aom_iadst8_c }, // DCT_FLIPADST
466 : { aom_iadst4_c, aom_iadst8_c }, // FLIPADST_FLIPADST
467 : { aom_iadst4_c, aom_iadst8_c }, // ADST_FLIPADST
468 : { aom_iadst4_c, aom_iadst8_c }, // FLIPADST_ADST
469 : { iidtx4_c, iidtx8_c }, // IDTX
470 : { aom_idct4_c, iidtx8_c }, // V_DCT
471 : { iidtx4_c, aom_idct8_c }, // H_DCT
472 : { aom_iadst4_c, iidtx8_c }, // V_ADST
473 : { iidtx4_c, aom_iadst8_c }, // H_ADST
474 : { aom_iadst4_c, iidtx8_c }, // V_FLIPADST
475 : { iidtx4_c, aom_iadst8_c }, // H_FLIPADST
476 : #endif
477 : };
478 0 : const int n = 4;
479 0 : const int n2 = 8;
480 :
481 : int i, j;
482 : tran_low_t out[8][4], tmp[8][4], outtmp[8];
483 0 : tran_low_t *outp = &out[0][0];
484 0 : int outstride = n;
485 :
486 : // inverse transform row vectors and transpose
487 0 : for (i = 0; i < n; ++i) {
488 0 : IHT_8x4[tx_type].rows(input, outtmp);
489 0 : for (j = 0; j < n2; ++j)
490 0 : tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
491 0 : input += n2;
492 : }
493 :
494 : // inverse transform column vectors
495 0 : for (i = 0; i < n2; ++i) {
496 0 : IHT_8x4[tx_type].cols(tmp[i], out[i]);
497 : }
498 :
499 : #if CONFIG_EXT_TX
500 0 : maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
501 : #endif
502 :
503 : // Sum with the destination
504 0 : for (i = 0; i < n; ++i) {
505 0 : for (j = 0; j < n2; ++j) {
506 0 : int d = i * stride + j;
507 0 : int s = j * outstride + i;
508 0 : dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
509 : }
510 : }
511 0 : }
512 :
513 0 : void av1_iht4x16_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
514 : int tx_type) {
515 : static const transform_2d IHT_4x16[] = {
516 : { aom_idct16_c, aom_idct4_c }, // DCT_DCT
517 : { aom_iadst16_c, aom_idct4_c }, // ADST_DCT
518 : { aom_idct16_c, aom_iadst4_c }, // DCT_ADST
519 : { aom_iadst16_c, aom_iadst4_c }, // ADST_ADST
520 : #if CONFIG_EXT_TX
521 : { aom_iadst16_c, aom_idct4_c }, // FLIPADST_DCT
522 : { aom_idct16_c, aom_iadst4_c }, // DCT_FLIPADST
523 : { aom_iadst16_c, aom_iadst4_c }, // FLIPADST_FLIPADST
524 : { aom_iadst16_c, aom_iadst4_c }, // ADST_FLIPADST
525 : { aom_iadst16_c, aom_iadst4_c }, // FLIPADST_ADST
526 : { iidtx16_c, iidtx4_c }, // IDTX
527 : { aom_idct16_c, iidtx4_c }, // V_DCT
528 : { iidtx16_c, aom_idct4_c }, // H_DCT
529 : { aom_iadst16_c, iidtx4_c }, // V_ADST
530 : { iidtx16_c, aom_iadst4_c }, // H_ADST
531 : { aom_iadst16_c, iidtx4_c }, // V_FLIPADST
532 : { iidtx16_c, aom_iadst4_c }, // H_FLIPADST
533 : #endif
534 : };
535 :
536 0 : const int n = 4;
537 0 : const int n4 = 16;
538 : int i, j;
539 : tran_low_t out[4][16], tmp[4][16], outtmp[4];
540 0 : tran_low_t *outp = &out[0][0];
541 0 : int outstride = n4;
542 :
543 : // inverse transform row vectors and transpose
544 0 : for (i = 0; i < n4; ++i) {
545 0 : IHT_4x16[tx_type].rows(input, outtmp);
546 0 : for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j];
547 0 : input += n;
548 : }
549 :
550 : // inverse transform column vectors
551 0 : for (i = 0; i < n; ++i) IHT_4x16[tx_type].cols(tmp[i], out[i]);
552 :
553 : #if CONFIG_EXT_TX
554 0 : maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n4, n);
555 : #endif
556 :
557 : // Sum with the destination
558 0 : for (i = 0; i < n4; ++i) {
559 0 : for (j = 0; j < n; ++j) {
560 0 : int d = i * stride + j;
561 0 : int s = j * outstride + i;
562 0 : dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
563 : }
564 : }
565 0 : }
566 :
567 0 : void av1_iht16x4_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
568 : int tx_type) {
569 : static const transform_2d IHT_16x4[] = {
570 : { aom_idct4_c, aom_idct16_c }, // DCT_DCT
571 : { aom_iadst4_c, aom_idct16_c }, // ADST_DCT
572 : { aom_idct4_c, aom_iadst16_c }, // DCT_ADST
573 : { aom_iadst4_c, aom_iadst16_c }, // ADST_ADST
574 : #if CONFIG_EXT_TX
575 : { aom_iadst4_c, aom_idct16_c }, // FLIPADST_DCT
576 : { aom_idct4_c, aom_iadst16_c }, // DCT_FLIPADST
577 : { aom_iadst4_c, aom_iadst16_c }, // FLIPADST_FLIPADST
578 : { aom_iadst4_c, aom_iadst16_c }, // ADST_FLIPADST
579 : { aom_iadst4_c, aom_iadst16_c }, // FLIPADST_ADST
580 : { iidtx4_c, iidtx16_c }, // IDTX
581 : { aom_idct4_c, iidtx16_c }, // V_DCT
582 : { iidtx4_c, aom_idct16_c }, // H_DCT
583 : { aom_iadst4_c, iidtx16_c }, // V_ADST
584 : { iidtx4_c, aom_iadst16_c }, // H_ADST
585 : { aom_iadst4_c, iidtx16_c }, // V_FLIPADST
586 : { iidtx4_c, aom_iadst16_c }, // H_FLIPADST
587 : #endif
588 : };
589 0 : const int n = 4;
590 0 : const int n4 = 16;
591 :
592 : int i, j;
593 : tran_low_t out[16][4], tmp[16][4], outtmp[16];
594 0 : tran_low_t *outp = &out[0][0];
595 0 : int outstride = n;
596 :
597 : // inverse transform row vectors and transpose
598 0 : for (i = 0; i < n; ++i) {
599 0 : IHT_16x4[tx_type].rows(input, outtmp);
600 0 : for (j = 0; j < n4; ++j) tmp[j][i] = outtmp[j];
601 0 : input += n4;
602 : }
603 :
604 : // inverse transform column vectors
605 0 : for (i = 0; i < n4; ++i) IHT_16x4[tx_type].cols(tmp[i], out[i]);
606 :
607 : #if CONFIG_EXT_TX
608 0 : maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n4);
609 : #endif
610 :
611 : // Sum with the destination
612 0 : for (i = 0; i < n; ++i) {
613 0 : for (j = 0; j < n4; ++j) {
614 0 : int d = i * stride + j;
615 0 : int s = j * outstride + i;
616 0 : dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
617 : }
618 : }
619 0 : }
620 :
621 0 : void av1_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
622 : int tx_type) {
623 : static const transform_2d IHT_8x16[] = {
624 : { aom_idct16_c, aom_idct8_c }, // DCT_DCT
625 : { aom_iadst16_c, aom_idct8_c }, // ADST_DCT
626 : { aom_idct16_c, aom_iadst8_c }, // DCT_ADST
627 : { aom_iadst16_c, aom_iadst8_c }, // ADST_ADST
628 : #if CONFIG_EXT_TX
629 : { aom_iadst16_c, aom_idct8_c }, // FLIPADST_DCT
630 : { aom_idct16_c, aom_iadst8_c }, // DCT_FLIPADST
631 : { aom_iadst16_c, aom_iadst8_c }, // FLIPADST_FLIPADST
632 : { aom_iadst16_c, aom_iadst8_c }, // ADST_FLIPADST
633 : { aom_iadst16_c, aom_iadst8_c }, // FLIPADST_ADST
634 : { iidtx16_c, iidtx8_c }, // IDTX
635 : { aom_idct16_c, iidtx8_c }, // V_DCT
636 : { iidtx16_c, aom_idct8_c }, // H_DCT
637 : { aom_iadst16_c, iidtx8_c }, // V_ADST
638 : { iidtx16_c, aom_iadst8_c }, // H_ADST
639 : { aom_iadst16_c, iidtx8_c }, // V_FLIPADST
640 : { iidtx16_c, aom_iadst8_c }, // H_FLIPADST
641 : #endif
642 : };
643 :
644 0 : const int n = 8;
645 0 : const int n2 = 16;
646 : int i, j;
647 : tran_low_t out[8][16], tmp[8][16], outtmp[8];
648 0 : tran_low_t *outp = &out[0][0];
649 0 : int outstride = n2;
650 :
651 : // inverse transform row vectors and transpose
652 0 : for (i = 0; i < n2; ++i) {
653 0 : IHT_8x16[tx_type].rows(input, outtmp);
654 0 : for (j = 0; j < n; ++j)
655 0 : tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
656 0 : input += n;
657 : }
658 :
659 : // inverse transform column vectors
660 0 : for (i = 0; i < n; ++i) {
661 0 : IHT_8x16[tx_type].cols(tmp[i], out[i]);
662 : }
663 :
664 : #if CONFIG_EXT_TX
665 0 : maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
666 : #endif
667 :
668 : // Sum with the destination
669 0 : for (i = 0; i < n2; ++i) {
670 0 : for (j = 0; j < n; ++j) {
671 0 : int d = i * stride + j;
672 0 : int s = j * outstride + i;
673 0 : dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
674 : }
675 : }
676 0 : }
677 :
678 0 : void av1_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
679 : int tx_type) {
680 : static const transform_2d IHT_16x8[] = {
681 : { aom_idct8_c, aom_idct16_c }, // DCT_DCT
682 : { aom_iadst8_c, aom_idct16_c }, // ADST_DCT
683 : { aom_idct8_c, aom_iadst16_c }, // DCT_ADST
684 : { aom_iadst8_c, aom_iadst16_c }, // ADST_ADST
685 : #if CONFIG_EXT_TX
686 : { aom_iadst8_c, aom_idct16_c }, // FLIPADST_DCT
687 : { aom_idct8_c, aom_iadst16_c }, // DCT_FLIPADST
688 : { aom_iadst8_c, aom_iadst16_c }, // FLIPADST_FLIPADST
689 : { aom_iadst8_c, aom_iadst16_c }, // ADST_FLIPADST
690 : { aom_iadst8_c, aom_iadst16_c }, // FLIPADST_ADST
691 : { iidtx8_c, iidtx16_c }, // IDTX
692 : { aom_idct8_c, iidtx16_c }, // V_DCT
693 : { iidtx8_c, aom_idct16_c }, // H_DCT
694 : { aom_iadst8_c, iidtx16_c }, // V_ADST
695 : { iidtx8_c, aom_iadst16_c }, // H_ADST
696 : { aom_iadst8_c, iidtx16_c }, // V_FLIPADST
697 : { iidtx8_c, aom_iadst16_c }, // H_FLIPADST
698 : #endif
699 : };
700 0 : const int n = 8;
701 0 : const int n2 = 16;
702 :
703 : int i, j;
704 : tran_low_t out[16][8], tmp[16][8], outtmp[16];
705 0 : tran_low_t *outp = &out[0][0];
706 0 : int outstride = n;
707 :
708 : // inverse transform row vectors and transpose
709 0 : for (i = 0; i < n; ++i) {
710 0 : IHT_16x8[tx_type].rows(input, outtmp);
711 0 : for (j = 0; j < n2; ++j)
712 0 : tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
713 0 : input += n2;
714 : }
715 :
716 : // inverse transform column vectors
717 0 : for (i = 0; i < n2; ++i) {
718 0 : IHT_16x8[tx_type].cols(tmp[i], out[i]);
719 : }
720 :
721 : #if CONFIG_EXT_TX
722 0 : maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
723 : #endif
724 :
725 : // Sum with the destination
726 0 : for (i = 0; i < n; ++i) {
727 0 : for (j = 0; j < n2; ++j) {
728 0 : int d = i * stride + j;
729 0 : int s = j * outstride + i;
730 0 : dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
731 : }
732 : }
733 0 : }
734 :
735 0 : void av1_iht8x32_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
736 : int tx_type) {
737 : static const transform_2d IHT_8x32[] = {
738 : { aom_idct32_c, aom_idct8_c }, // DCT_DCT
739 : { ihalfright32_c, aom_idct8_c }, // ADST_DCT
740 : { aom_idct32_c, aom_iadst8_c }, // DCT_ADST
741 : { ihalfright32_c, aom_iadst8_c }, // ADST_ADST
742 : #if CONFIG_EXT_TX
743 : { ihalfright32_c, aom_idct8_c }, // FLIPADST_DCT
744 : { aom_idct32_c, aom_iadst8_c }, // DCT_FLIPADST
745 : { ihalfright32_c, aom_iadst8_c }, // FLIPADST_FLIPADST
746 : { ihalfright32_c, aom_iadst8_c }, // ADST_FLIPADST
747 : { ihalfright32_c, aom_iadst8_c }, // FLIPADST_ADST
748 : { iidtx32_c, iidtx8_c }, // IDTX
749 : { aom_idct32_c, iidtx8_c }, // V_DCT
750 : { iidtx32_c, aom_idct8_c }, // H_DCT
751 : { ihalfright32_c, iidtx8_c }, // V_ADST
752 : { iidtx32_c, aom_iadst8_c }, // H_ADST
753 : { ihalfright32_c, iidtx8_c }, // V_FLIPADST
754 : { iidtx32_c, aom_iadst8_c }, // H_FLIPADST
755 : #endif
756 : };
757 :
758 0 : const int n = 8;
759 0 : const int n4 = 32;
760 : int i, j;
761 : tran_low_t out[8][32], tmp[8][32], outtmp[8];
762 0 : tran_low_t *outp = &out[0][0];
763 0 : int outstride = n4;
764 :
765 : // inverse transform row vectors and transpose
766 0 : for (i = 0; i < n4; ++i) {
767 0 : IHT_8x32[tx_type].rows(input, outtmp);
768 0 : for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j];
769 0 : input += n;
770 : }
771 :
772 : // inverse transform column vectors
773 0 : for (i = 0; i < n; ++i) IHT_8x32[tx_type].cols(tmp[i], out[i]);
774 :
775 : #if CONFIG_EXT_TX
776 0 : maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n4, n);
777 : #endif
778 :
779 : // Sum with the destination
780 0 : for (i = 0; i < n4; ++i) {
781 0 : for (j = 0; j < n; ++j) {
782 0 : int d = i * stride + j;
783 0 : int s = j * outstride + i;
784 0 : dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
785 : }
786 : }
787 0 : }
788 :
789 0 : void av1_iht32x8_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
790 : int tx_type) {
791 : static const transform_2d IHT_32x8[] = {
792 : { aom_idct8_c, aom_idct32_c }, // DCT_DCT
793 : { aom_iadst8_c, aom_idct32_c }, // ADST_DCT
794 : { aom_idct8_c, ihalfright32_c }, // DCT_ADST
795 : { aom_iadst8_c, ihalfright32_c }, // ADST_ADST
796 : #if CONFIG_EXT_TX
797 : { aom_iadst8_c, aom_idct32_c }, // FLIPADST_DCT
798 : { aom_idct8_c, ihalfright32_c }, // DCT_FLIPADST
799 : { aom_iadst8_c, ihalfright32_c }, // FLIPADST_FLIPADST
800 : { aom_iadst8_c, ihalfright32_c }, // ADST_FLIPADST
801 : { aom_iadst8_c, ihalfright32_c }, // FLIPADST_ADST
802 : { iidtx8_c, iidtx32_c }, // IDTX
803 : { aom_idct8_c, iidtx32_c }, // V_DCT
804 : { iidtx8_c, aom_idct32_c }, // H_DCT
805 : { aom_iadst8_c, iidtx32_c }, // V_ADST
806 : { iidtx8_c, ihalfright32_c }, // H_ADST
807 : { aom_iadst8_c, iidtx32_c }, // V_FLIPADST
808 : { iidtx8_c, ihalfright32_c }, // H_FLIPADST
809 : #endif
810 : };
811 0 : const int n = 8;
812 0 : const int n4 = 32;
813 :
814 : int i, j;
815 : tran_low_t out[32][8], tmp[32][8], outtmp[32];
816 0 : tran_low_t *outp = &out[0][0];
817 0 : int outstride = n;
818 :
819 : // inverse transform row vectors and transpose
820 0 : for (i = 0; i < n; ++i) {
821 0 : IHT_32x8[tx_type].rows(input, outtmp);
822 0 : for (j = 0; j < n4; ++j) tmp[j][i] = outtmp[j];
823 0 : input += n4;
824 : }
825 :
826 : // inverse transform column vectors
827 0 : for (i = 0; i < n4; ++i) IHT_32x8[tx_type].cols(tmp[i], out[i]);
828 :
829 : #if CONFIG_EXT_TX
830 0 : maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n4);
831 : #endif
832 :
833 : // Sum with the destination
834 0 : for (i = 0; i < n; ++i) {
835 0 : for (j = 0; j < n4; ++j) {
836 0 : int d = i * stride + j;
837 0 : int s = j * outstride + i;
838 0 : dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
839 : }
840 : }
841 0 : }
842 :
843 0 : void av1_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
844 : int tx_type) {
845 : static const transform_2d IHT_16x32[] = {
846 : { aom_idct32_c, aom_idct16_c }, // DCT_DCT
847 : { ihalfright32_c, aom_idct16_c }, // ADST_DCT
848 : { aom_idct32_c, aom_iadst16_c }, // DCT_ADST
849 : { ihalfright32_c, aom_iadst16_c }, // ADST_ADST
850 : #if CONFIG_EXT_TX
851 : { ihalfright32_c, aom_idct16_c }, // FLIPADST_DCT
852 : { aom_idct32_c, aom_iadst16_c }, // DCT_FLIPADST
853 : { ihalfright32_c, aom_iadst16_c }, // FLIPADST_FLIPADST
854 : { ihalfright32_c, aom_iadst16_c }, // ADST_FLIPADST
855 : { ihalfright32_c, aom_iadst16_c }, // FLIPADST_ADST
856 : { iidtx32_c, iidtx16_c }, // IDTX
857 : { aom_idct32_c, iidtx16_c }, // V_DCT
858 : { iidtx32_c, aom_idct16_c }, // H_DCT
859 : { ihalfright32_c, iidtx16_c }, // V_ADST
860 : { iidtx32_c, aom_iadst16_c }, // H_ADST
861 : { ihalfright32_c, iidtx16_c }, // V_FLIPADST
862 : { iidtx32_c, aom_iadst16_c }, // H_FLIPADST
863 : #endif
864 : };
865 :
866 0 : const int n = 16;
867 0 : const int n2 = 32;
868 : int i, j;
869 : tran_low_t out[16][32], tmp[16][32], outtmp[16];
870 0 : tran_low_t *outp = &out[0][0];
871 0 : int outstride = n2;
872 :
873 : // inverse transform row vectors and transpose
874 0 : for (i = 0; i < n2; ++i) {
875 0 : IHT_16x32[tx_type].rows(input, outtmp);
876 0 : for (j = 0; j < n; ++j)
877 0 : tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
878 0 : input += n;
879 : }
880 :
881 : // inverse transform column vectors
882 0 : for (i = 0; i < n; ++i) {
883 0 : IHT_16x32[tx_type].cols(tmp[i], out[i]);
884 : }
885 :
886 : #if CONFIG_EXT_TX
887 0 : maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
888 : #endif
889 :
890 : // Sum with the destination
891 0 : for (i = 0; i < n2; ++i) {
892 0 : for (j = 0; j < n; ++j) {
893 0 : int d = i * stride + j;
894 0 : int s = j * outstride + i;
895 0 : dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
896 : }
897 : }
898 0 : }
899 :
900 0 : void av1_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
901 : int tx_type) {
902 : static const transform_2d IHT_32x16[] = {
903 : { aom_idct16_c, aom_idct32_c }, // DCT_DCT
904 : { aom_iadst16_c, aom_idct32_c }, // ADST_DCT
905 : { aom_idct16_c, ihalfright32_c }, // DCT_ADST
906 : { aom_iadst16_c, ihalfright32_c }, // ADST_ADST
907 : #if CONFIG_EXT_TX
908 : { aom_iadst16_c, aom_idct32_c }, // FLIPADST_DCT
909 : { aom_idct16_c, ihalfright32_c }, // DCT_FLIPADST
910 : { aom_iadst16_c, ihalfright32_c }, // FLIPADST_FLIPADST
911 : { aom_iadst16_c, ihalfright32_c }, // ADST_FLIPADST
912 : { aom_iadst16_c, ihalfright32_c }, // FLIPADST_ADST
913 : { iidtx16_c, iidtx32_c }, // IDTX
914 : { aom_idct16_c, iidtx32_c }, // V_DCT
915 : { iidtx16_c, aom_idct32_c }, // H_DCT
916 : { aom_iadst16_c, iidtx32_c }, // V_ADST
917 : { iidtx16_c, ihalfright32_c }, // H_ADST
918 : { aom_iadst16_c, iidtx32_c }, // V_FLIPADST
919 : { iidtx16_c, ihalfright32_c }, // H_FLIPADST
920 : #endif
921 : };
922 0 : const int n = 16;
923 0 : const int n2 = 32;
924 :
925 : int i, j;
926 : tran_low_t out[32][16], tmp[32][16], outtmp[32];
927 0 : tran_low_t *outp = &out[0][0];
928 0 : int outstride = n;
929 :
930 : // inverse transform row vectors and transpose
931 0 : for (i = 0; i < n; ++i) {
932 0 : IHT_32x16[tx_type].rows(input, outtmp);
933 0 : for (j = 0; j < n2; ++j)
934 0 : tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
935 0 : input += n2;
936 : }
937 :
938 : // inverse transform column vectors
939 0 : for (i = 0; i < n2; ++i) {
940 0 : IHT_32x16[tx_type].cols(tmp[i], out[i]);
941 : }
942 :
943 : #if CONFIG_EXT_TX
944 0 : maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
945 : #endif
946 :
947 : // Sum with the destination
948 0 : for (i = 0; i < n; ++i) {
949 0 : for (j = 0; j < n2; ++j) {
950 0 : int d = i * stride + j;
951 0 : int s = j * outstride + i;
952 0 : dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
953 : }
954 : }
955 0 : }
956 :
957 0 : void av1_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
958 : int tx_type) {
959 : static const transform_2d IHT_8[] = {
960 : { aom_idct8_c, aom_idct8_c }, // DCT_DCT = 0
961 : { aom_iadst8_c, aom_idct8_c }, // ADST_DCT = 1
962 : { aom_idct8_c, aom_iadst8_c }, // DCT_ADST = 2
963 : { aom_iadst8_c, aom_iadst8_c }, // ADST_ADST = 3
964 : #if CONFIG_EXT_TX
965 : { aom_iadst8_c, aom_idct8_c }, // FLIPADST_DCT
966 : { aom_idct8_c, aom_iadst8_c }, // DCT_FLIPADST
967 : { aom_iadst8_c, aom_iadst8_c }, // FLIPADST_FLIPADST
968 : { aom_iadst8_c, aom_iadst8_c }, // ADST_FLIPADST
969 : { aom_iadst8_c, aom_iadst8_c }, // FLIPADST_ADST
970 : { iidtx8_c, iidtx8_c }, // IDTX
971 : { aom_idct8_c, iidtx8_c }, // V_DCT
972 : { iidtx8_c, aom_idct8_c }, // H_DCT
973 : { aom_iadst8_c, iidtx8_c }, // V_ADST
974 : { iidtx8_c, aom_iadst8_c }, // H_ADST
975 : { aom_iadst8_c, iidtx8_c }, // V_FLIPADST
976 : { iidtx8_c, aom_iadst8_c }, // H_FLIPADST
977 : #endif // CONFIG_EXT_TX
978 : };
979 :
980 : int i, j;
981 : tran_low_t tmp[8][8];
982 : tran_low_t out[8][8];
983 0 : tran_low_t *outp = &out[0][0];
984 0 : int outstride = 8;
985 :
986 : // inverse transform row vectors
987 0 : for (i = 0; i < 8; ++i) {
988 0 : IHT_8[tx_type].rows(input, out[i]);
989 0 : input += 8;
990 : }
991 :
992 : // transpose
993 0 : for (i = 0; i < 8; i++) {
994 0 : for (j = 0; j < 8; j++) {
995 0 : tmp[j][i] = out[i][j];
996 : }
997 : }
998 :
999 : // inverse transform column vectors
1000 0 : for (i = 0; i < 8; ++i) {
1001 0 : IHT_8[tx_type].cols(tmp[i], out[i]);
1002 : }
1003 :
1004 : #if CONFIG_EXT_TX
1005 0 : maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 8, 8);
1006 : #endif
1007 :
1008 : // Sum with the destination
1009 0 : for (i = 0; i < 8; ++i) {
1010 0 : for (j = 0; j < 8; ++j) {
1011 0 : int d = i * stride + j;
1012 0 : int s = j * outstride + i;
1013 0 : dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
1014 : }
1015 : }
1016 0 : }
1017 :
1018 0 : void av1_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
1019 : int tx_type) {
1020 : static const transform_2d IHT_16[] = {
1021 : { aom_idct16_c, aom_idct16_c }, // DCT_DCT = 0
1022 : { aom_iadst16_c, aom_idct16_c }, // ADST_DCT = 1
1023 : { aom_idct16_c, aom_iadst16_c }, // DCT_ADST = 2
1024 : { aom_iadst16_c, aom_iadst16_c }, // ADST_ADST = 3
1025 : #if CONFIG_EXT_TX
1026 : { aom_iadst16_c, aom_idct16_c }, // FLIPADST_DCT
1027 : { aom_idct16_c, aom_iadst16_c }, // DCT_FLIPADST
1028 : { aom_iadst16_c, aom_iadst16_c }, // FLIPADST_FLIPADST
1029 : { aom_iadst16_c, aom_iadst16_c }, // ADST_FLIPADST
1030 : { aom_iadst16_c, aom_iadst16_c }, // FLIPADST_ADST
1031 : { iidtx16_c, iidtx16_c }, // IDTX
1032 : { aom_idct16_c, iidtx16_c }, // V_DCT
1033 : { iidtx16_c, aom_idct16_c }, // H_DCT
1034 : { aom_iadst16_c, iidtx16_c }, // V_ADST
1035 : { iidtx16_c, aom_iadst16_c }, // H_ADST
1036 : { aom_iadst16_c, iidtx16_c }, // V_FLIPADST
1037 : { iidtx16_c, aom_iadst16_c }, // H_FLIPADST
1038 : #endif // CONFIG_EXT_TX
1039 : };
1040 :
1041 : int i, j;
1042 : tran_low_t tmp[16][16];
1043 : tran_low_t out[16][16];
1044 0 : tran_low_t *outp = &out[0][0];
1045 0 : int outstride = 16;
1046 :
1047 : // inverse transform row vectors
1048 0 : for (i = 0; i < 16; ++i) {
1049 0 : IHT_16[tx_type].rows(input, out[i]);
1050 0 : input += 16;
1051 : }
1052 :
1053 : // transpose
1054 0 : for (i = 0; i < 16; i++) {
1055 0 : for (j = 0; j < 16; j++) {
1056 0 : tmp[j][i] = out[i][j];
1057 : }
1058 : }
1059 :
1060 : // inverse transform column vectors
1061 0 : for (i = 0; i < 16; ++i) {
1062 0 : IHT_16[tx_type].cols(tmp[i], out[i]);
1063 : }
1064 :
1065 : #if CONFIG_EXT_TX
1066 0 : maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 16, 16);
1067 : #endif
1068 :
1069 : // Sum with the destination
1070 0 : for (i = 0; i < 16; ++i) {
1071 0 : for (j = 0; j < 16; ++j) {
1072 0 : int d = i * stride + j;
1073 0 : int s = j * outstride + i;
1074 0 : dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
1075 : }
1076 : }
1077 0 : }
1078 :
1079 : #if CONFIG_EXT_TX
1080 0 : void av1_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int stride,
1081 : int tx_type) {
1082 : static const transform_2d IHT_32[] = {
1083 : { aom_idct32_c, aom_idct32_c }, // DCT_DCT
1084 : { ihalfright32_c, aom_idct32_c }, // ADST_DCT
1085 : { aom_idct32_c, ihalfright32_c }, // DCT_ADST
1086 : { ihalfright32_c, ihalfright32_c }, // ADST_ADST
1087 : { ihalfright32_c, aom_idct32_c }, // FLIPADST_DCT
1088 : { aom_idct32_c, ihalfright32_c }, // DCT_FLIPADST
1089 : { ihalfright32_c, ihalfright32_c }, // FLIPADST_FLIPADST
1090 : { ihalfright32_c, ihalfright32_c }, // ADST_FLIPADST
1091 : { ihalfright32_c, ihalfright32_c }, // FLIPADST_ADST
1092 : { iidtx32_c, iidtx32_c }, // IDTX
1093 : { aom_idct32_c, iidtx32_c }, // V_DCT
1094 : { iidtx32_c, aom_idct32_c }, // H_DCT
1095 : { ihalfright32_c, iidtx32_c }, // V_ADST
1096 : { iidtx32_c, ihalfright32_c }, // H_ADST
1097 : { ihalfright32_c, iidtx32_c }, // V_FLIPADST
1098 : { iidtx32_c, ihalfright32_c }, // H_FLIPADST
1099 : };
1100 :
1101 : int i, j;
1102 : tran_low_t tmp[32][32];
1103 : tran_low_t out[32][32];
1104 0 : tran_low_t *outp = &out[0][0];
1105 0 : int outstride = 32;
1106 :
1107 : // inverse transform row vectors
1108 0 : for (i = 0; i < 32; ++i) {
1109 0 : IHT_32[tx_type].rows(input, out[i]);
1110 0 : input += 32;
1111 : }
1112 :
1113 : // transpose
1114 0 : for (i = 0; i < 32; i++) {
1115 0 : for (j = 0; j < 32; j++) {
1116 0 : tmp[j][i] = out[i][j];
1117 : }
1118 : }
1119 :
1120 : // inverse transform column vectors
1121 0 : for (i = 0; i < 32; ++i) {
1122 0 : IHT_32[tx_type].cols(tmp[i], out[i]);
1123 : }
1124 :
1125 0 : maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 32, 32);
1126 :
1127 : // Sum with the destination
1128 0 : for (i = 0; i < 32; ++i) {
1129 0 : for (j = 0; j < 32; ++j) {
1130 0 : int d = i * stride + j;
1131 0 : int s = j * outstride + i;
1132 0 : dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
1133 : }
1134 : }
1135 0 : }
1136 : #endif // CONFIG_EXT_TX
1137 :
1138 : #if CONFIG_TX64X64
1139 : void av1_iht64x64_4096_add_c(const tran_low_t *input, uint8_t *dest, int stride,
1140 : int tx_type) {
1141 : static const transform_2d IHT_64[] = {
1142 : { idct64_col_c, idct64_row_c }, // DCT_DCT
1143 : { ihalfright64_c, idct64_row_c }, // ADST_DCT
1144 : { idct64_col_c, ihalfright64_c }, // DCT_ADST
1145 : { ihalfright64_c, ihalfright64_c }, // ADST_ADST
1146 : #if CONFIG_EXT_TX
1147 : { ihalfright64_c, idct64_row_c }, // FLIPADST_DCT
1148 : { idct64_col_c, ihalfright64_c }, // DCT_FLIPADST
1149 : { ihalfright64_c, ihalfright64_c }, // FLIPADST_FLIPADST
1150 : { ihalfright64_c, ihalfright64_c }, // ADST_FLIPADST
1151 : { ihalfright64_c, ihalfright64_c }, // FLIPADST_ADST
1152 : { iidtx64_c, iidtx64_c }, // IDTX
1153 : { idct64_col_c, iidtx64_c }, // V_DCT
1154 : { iidtx64_c, idct64_row_c }, // H_DCT
1155 : { ihalfright64_c, iidtx64_c }, // V_ADST
1156 : { iidtx64_c, ihalfright64_c }, // H_ADST
1157 : { ihalfright64_c, iidtx64_c }, // V_FLIPADST
1158 : { iidtx64_c, ihalfright64_c }, // H_FLIPADST
1159 : #endif // CONFIG_EXT_TX
1160 : };
1161 :
1162 : int i, j;
1163 : tran_low_t tmp[64][64];
1164 : tran_low_t out[64][64];
1165 : tran_low_t *outp = &out[0][0];
1166 : int outstride = 64;
1167 :
1168 : // inverse transform row vectors
1169 : for (i = 0; i < 64; ++i) {
1170 : IHT_64[tx_type].rows(input, out[i]);
1171 : for (j = 0; j < 64; ++j) out[i][j] = ROUND_POWER_OF_TWO(out[i][j], 1);
1172 : input += 64;
1173 : }
1174 :
1175 : // transpose
1176 : for (i = 0; i < 64; i++) {
1177 : for (j = 0; j < 64; j++) {
1178 : tmp[j][i] = out[i][j];
1179 : }
1180 : }
1181 :
1182 : // inverse transform column vectors
1183 : for (i = 0; i < 64; ++i) {
1184 : IHT_64[tx_type].cols(tmp[i], out[i]);
1185 : }
1186 :
1187 : #if CONFIG_EXT_TX
1188 : maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 64, 64);
1189 : #endif // CONFIG_EXT_TX
1190 :
1191 : // Sum with the destination
1192 : for (i = 0; i < 64; ++i) {
1193 : for (j = 0; j < 64; ++j) {
1194 : int d = i * stride + j;
1195 : int s = j * outstride + i;
1196 : dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
1197 : }
1198 : }
1199 : }
1200 : #endif // CONFIG_TX64X64
1201 :
1202 : // idct
1203 0 : void av1_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
1204 : int eob) {
1205 0 : if (eob > 1)
1206 0 : aom_idct4x4_16_add(input, dest, stride);
1207 : else
1208 0 : aom_idct4x4_1_add(input, dest, stride);
1209 0 : }
1210 :
1211 0 : void av1_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
1212 : int eob) {
1213 0 : if (eob > 1)
1214 0 : aom_iwht4x4_16_add(input, dest, stride);
1215 : else
1216 0 : aom_iwht4x4_1_add(input, dest, stride);
1217 0 : }
1218 :
1219 0 : static void idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
1220 : const INV_TXFM_PARAM *param) {
1221 : // If dc is 1, then input[0] is the reconstructed value, do not need
1222 : // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
1223 :
1224 : // The calculation can be simplified if there are not many non-zero dct
1225 : // coefficients. Use eobs to decide what to do.
1226 : // TODO(yunqingwang): "eobs = 1" case is also handled in av1_short_idct8x8_c.
1227 : // Combine that with code here.
1228 : #if CONFIG_ADAPT_SCAN
1229 : const int16_t half = param->eob_threshold[0];
1230 : #else
1231 0 : const int16_t half = 12;
1232 : #endif
1233 :
1234 0 : const int eob = param->eob;
1235 0 : if (eob == 1)
1236 : // DC only DCT coefficient
1237 0 : aom_idct8x8_1_add(input, dest, stride);
1238 0 : else if (eob <= half)
1239 0 : aom_idct8x8_12_add(input, dest, stride);
1240 : else
1241 0 : aom_idct8x8_64_add(input, dest, stride);
1242 0 : }
1243 :
1244 0 : static void idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
1245 : const INV_TXFM_PARAM *param) {
1246 : // The calculation can be simplified if there are not many non-zero dct
1247 : // coefficients. Use eobs to separate different cases.
1248 : #if CONFIG_ADAPT_SCAN
1249 : const int16_t half = param->eob_threshold[0];
1250 : const int16_t quarter = param->eob_threshold[1];
1251 : #else
1252 0 : const int16_t half = 38;
1253 0 : const int16_t quarter = 10;
1254 : #endif
1255 :
1256 0 : const int eob = param->eob;
1257 0 : if (eob == 1) /* DC only DCT coefficient. */
1258 0 : aom_idct16x16_1_add(input, dest, stride);
1259 0 : else if (eob <= quarter)
1260 0 : aom_idct16x16_10_add(input, dest, stride);
1261 0 : else if (eob <= half)
1262 0 : aom_idct16x16_38_add(input, dest, stride);
1263 : else
1264 0 : aom_idct16x16_256_add(input, dest, stride);
1265 0 : }
1266 :
1267 0 : static void idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
1268 : const INV_TXFM_PARAM *param) {
1269 : #if CONFIG_ADAPT_SCAN
1270 : const int16_t half = param->eob_threshold[0];
1271 : const int16_t quarter = param->eob_threshold[1];
1272 : #else
1273 0 : const int16_t half = 135;
1274 0 : const int16_t quarter = 34;
1275 : #endif
1276 :
1277 0 : const int eob = param->eob;
1278 0 : if (eob == 1)
1279 0 : aom_idct32x32_1_add(input, dest, stride);
1280 0 : else if (eob <= quarter)
1281 : // non-zero coeff only in upper-left 8x8
1282 0 : aom_idct32x32_34_add(input, dest, stride);
1283 0 : else if (eob <= half)
1284 : // non-zero coeff only in upper-left 16x16
1285 0 : aom_idct32x32_135_add(input, dest, stride);
1286 : else
1287 0 : aom_idct32x32_1024_add(input, dest, stride);
1288 0 : }
1289 :
1290 : #if CONFIG_TX64X64
1291 : static void idct64x64_add(const tran_low_t *input, uint8_t *dest, int stride,
1292 : const INV_TXFM_PARAM *param) {
1293 : (void)param;
1294 : av1_iht64x64_4096_add(input, dest, stride, DCT_DCT);
1295 : }
1296 : #endif // CONFIG_TX64X64
1297 :
1298 : #if CONFIG_CHROMA_2X2
1299 : static void inv_txfm_add_2x2(const tran_low_t *input, uint8_t *dest, int stride,
1300 : int eob, TX_TYPE tx_type, int lossless) {
1301 : tran_high_t a1 = input[0] >> UNIT_QUANT_SHIFT;
1302 : tran_high_t b1 = input[1] >> UNIT_QUANT_SHIFT;
1303 : tran_high_t c1 = input[2] >> UNIT_QUANT_SHIFT;
1304 : tran_high_t d1 = input[3] >> UNIT_QUANT_SHIFT;
1305 :
1306 : tran_high_t a2 = a1 + c1;
1307 : tran_high_t b2 = b1 + d1;
1308 : tran_high_t c2 = a1 - c1;
1309 : tran_high_t d2 = b1 - d1;
1310 :
1311 : (void)tx_type;
1312 : (void)lossless;
1313 : (void)eob;
1314 :
1315 : a1 = (a2 + b2) >> 2;
1316 : b1 = (a2 - b2) >> 2;
1317 : c1 = (c2 + d2) >> 2;
1318 : d1 = (c2 - d2) >> 2;
1319 :
1320 : dest[0] = clip_pixel_add(dest[0], WRAPLOW(a1));
1321 : dest[1] = clip_pixel_add(dest[1], WRAPLOW(b1));
1322 : dest[stride] = clip_pixel_add(dest[stride], WRAPLOW(c1));
1323 : dest[stride + 1] = clip_pixel_add(dest[stride + 1], WRAPLOW(d1));
1324 : }
1325 : #endif
1326 :
1327 0 : static void inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, int stride,
1328 : int eob, TX_TYPE tx_type, int lossless) {
1329 0 : if (lossless) {
1330 0 : assert(tx_type == DCT_DCT);
1331 0 : av1_iwht4x4_add(input, dest, stride, eob);
1332 0 : return;
1333 : }
1334 :
1335 0 : switch (tx_type) {
1336 0 : case DCT_DCT: av1_idct4x4_add(input, dest, stride, eob); break;
1337 : case ADST_DCT:
1338 : case DCT_ADST:
1339 0 : case ADST_ADST: av1_iht4x4_16_add(input, dest, stride, tx_type); break;
1340 : #if CONFIG_EXT_TX
1341 : case FLIPADST_DCT:
1342 : case DCT_FLIPADST:
1343 : case FLIPADST_FLIPADST:
1344 : case ADST_FLIPADST:
1345 0 : case FLIPADST_ADST: av1_iht4x4_16_add(input, dest, stride, tx_type); break;
1346 : case V_DCT:
1347 : case H_DCT:
1348 : case V_ADST:
1349 : case H_ADST:
1350 : case V_FLIPADST:
1351 : case H_FLIPADST:
1352 : // Use C version since DST only exists in C code
1353 0 : av1_iht4x4_16_add_c(input, dest, stride, tx_type);
1354 0 : break;
1355 0 : case IDTX: inv_idtx_add_c(input, dest, stride, 4, tx_type); break;
1356 : #endif // CONFIG_EXT_TX
1357 0 : default: assert(0); break;
1358 : }
1359 : }
1360 :
1361 0 : static void inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest, int stride,
1362 : int eob, TX_TYPE tx_type) {
1363 : (void)eob;
1364 0 : av1_iht4x8_32_add(input, dest, stride, tx_type);
1365 0 : }
1366 :
1367 0 : static void inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest, int stride,
1368 : int eob, TX_TYPE tx_type) {
1369 : (void)eob;
1370 0 : av1_iht8x4_32_add(input, dest, stride, tx_type);
1371 0 : }
1372 :
1373 : // These will be used by the masked-tx experiment in the future.
1374 : #if CONFIG_RECT_TX && CONFIG_EXT_TX && CONFIG_RECT_TX_EXT
1375 : static void inv_txfm_add_4x16(const tran_low_t *input, uint8_t *dest,
1376 : int stride, int eob, TX_TYPE tx_type) {
1377 : (void)eob;
1378 : av1_iht4x16_64_add(input, dest, stride, tx_type);
1379 : }
1380 :
1381 : static void inv_txfm_add_16x4(const tran_low_t *input, uint8_t *dest,
1382 : int stride, int eob, TX_TYPE tx_type) {
1383 : (void)eob;
1384 : av1_iht16x4_64_add(input, dest, stride, tx_type);
1385 : }
1386 :
1387 : static void inv_txfm_add_8x32(const tran_low_t *input, uint8_t *dest,
1388 : int stride, int eob, TX_TYPE tx_type) {
1389 : (void)eob;
1390 : av1_iht8x32_256_add(input, dest, stride, tx_type);
1391 : }
1392 :
1393 : static void inv_txfm_add_32x8(const tran_low_t *input, uint8_t *dest,
1394 : int stride, int eob, TX_TYPE tx_type) {
1395 : (void)eob;
1396 : av1_iht32x8_256_add(input, dest, stride, tx_type);
1397 : }
1398 : #endif
1399 :
1400 0 : static void inv_txfm_add_8x16(const tran_low_t *input, uint8_t *dest,
1401 : int stride, int eob, TX_TYPE tx_type) {
1402 : (void)eob;
1403 0 : av1_iht8x16_128_add(input, dest, stride, tx_type);
1404 0 : }
1405 :
1406 0 : static void inv_txfm_add_16x8(const tran_low_t *input, uint8_t *dest,
1407 : int stride, int eob, TX_TYPE tx_type) {
1408 : (void)eob;
1409 0 : av1_iht16x8_128_add(input, dest, stride, tx_type);
1410 0 : }
1411 :
1412 0 : static void inv_txfm_add_16x32(const tran_low_t *input, uint8_t *dest,
1413 : int stride, int eob, TX_TYPE tx_type) {
1414 : (void)eob;
1415 0 : av1_iht16x32_512_add(input, dest, stride, tx_type);
1416 0 : }
1417 :
1418 0 : static void inv_txfm_add_32x16(const tran_low_t *input, uint8_t *dest,
1419 : int stride, int eob, TX_TYPE tx_type) {
1420 : (void)eob;
1421 0 : av1_iht32x16_512_add(input, dest, stride, tx_type);
1422 0 : }
1423 :
1424 0 : static void inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest, int stride,
1425 : const INV_TXFM_PARAM *param) {
1426 0 : const TX_TYPE tx_type = param->tx_type;
1427 0 : switch (tx_type) {
1428 0 : case DCT_DCT: idct8x8_add(input, dest, stride, param); break;
1429 : case ADST_DCT:
1430 : case DCT_ADST:
1431 0 : case ADST_ADST: av1_iht8x8_64_add(input, dest, stride, tx_type); break;
1432 : #if CONFIG_EXT_TX
1433 : case FLIPADST_DCT:
1434 : case DCT_FLIPADST:
1435 : case FLIPADST_FLIPADST:
1436 : case ADST_FLIPADST:
1437 0 : case FLIPADST_ADST: av1_iht8x8_64_add(input, dest, stride, tx_type); break;
1438 : case V_DCT:
1439 : case H_DCT:
1440 : case V_ADST:
1441 : case H_ADST:
1442 : case V_FLIPADST:
1443 : case H_FLIPADST:
1444 : // Use C version since DST only exists in C code
1445 0 : av1_iht8x8_64_add_c(input, dest, stride, tx_type);
1446 0 : break;
1447 0 : case IDTX: inv_idtx_add_c(input, dest, stride, 8, tx_type); break;
1448 : #endif // CONFIG_EXT_TX
1449 0 : default: assert(0); break;
1450 : }
1451 0 : }
1452 :
1453 0 : static void inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest,
1454 : int stride, const INV_TXFM_PARAM *param) {
1455 0 : const TX_TYPE tx_type = param->tx_type;
1456 0 : switch (tx_type) {
1457 0 : case DCT_DCT: idct16x16_add(input, dest, stride, param); break;
1458 : case ADST_DCT:
1459 : case DCT_ADST:
1460 0 : case ADST_ADST: av1_iht16x16_256_add(input, dest, stride, tx_type); break;
1461 : #if CONFIG_EXT_TX
1462 : case FLIPADST_DCT:
1463 : case DCT_FLIPADST:
1464 : case FLIPADST_FLIPADST:
1465 : case ADST_FLIPADST:
1466 : case FLIPADST_ADST:
1467 : case V_DCT:
1468 : case H_DCT:
1469 : case V_ADST:
1470 : case H_ADST:
1471 : case V_FLIPADST:
1472 0 : case H_FLIPADST: av1_iht16x16_256_add(input, dest, stride, tx_type); break;
1473 0 : case IDTX: inv_idtx_add_c(input, dest, stride, 16, tx_type); break;
1474 : #endif // CONFIG_EXT_TX
1475 0 : default: assert(0); break;
1476 : }
1477 0 : }
1478 :
1479 0 : static void inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest,
1480 : int stride, const INV_TXFM_PARAM *param) {
1481 0 : const TX_TYPE tx_type = param->tx_type;
1482 0 : switch (tx_type) {
1483 0 : case DCT_DCT: idct32x32_add(input, dest, stride, param); break;
1484 : #if CONFIG_EXT_TX
1485 : case ADST_DCT:
1486 : case DCT_ADST:
1487 : case ADST_ADST:
1488 : case FLIPADST_DCT:
1489 : case DCT_FLIPADST:
1490 : case FLIPADST_FLIPADST:
1491 : case ADST_FLIPADST:
1492 : case FLIPADST_ADST:
1493 : case V_DCT:
1494 : case H_DCT:
1495 : case V_ADST:
1496 : case H_ADST:
1497 : case V_FLIPADST:
1498 : case H_FLIPADST:
1499 0 : av1_iht32x32_1024_add_c(input, dest, stride, tx_type);
1500 0 : break;
1501 0 : case IDTX: inv_idtx_add_c(input, dest, stride, 32, tx_type); break;
1502 : #endif // CONFIG_EXT_TX
1503 0 : default: assert(0); break;
1504 : }
1505 0 : }
1506 :
1507 : #if CONFIG_TX64X64
1508 : static void inv_txfm_add_64x64(const tran_low_t *input, uint8_t *dest,
1509 : int stride, const INV_TXFM_PARAM *param) {
1510 : const TX_TYPE tx_type = param->tx_type;
1511 : switch (tx_type) {
1512 : case DCT_DCT: idct64x64_add(input, dest, stride, param); break;
1513 : #if CONFIG_EXT_TX
1514 : case ADST_DCT:
1515 : case DCT_ADST:
1516 : case ADST_ADST:
1517 : case FLIPADST_DCT:
1518 : case DCT_FLIPADST:
1519 : case FLIPADST_FLIPADST:
1520 : case ADST_FLIPADST:
1521 : case FLIPADST_ADST:
1522 : case V_DCT:
1523 : case H_DCT:
1524 : case V_ADST:
1525 : case H_ADST:
1526 : case V_FLIPADST:
1527 : case H_FLIPADST:
1528 : av1_iht64x64_4096_add_c(input, dest, stride, tx_type);
1529 : break;
1530 : case IDTX: inv_idtx_add_c(input, dest, stride, 64, tx_type); break;
1531 : #endif // CONFIG_EXT_TX
1532 : default: assert(0); break;
1533 : }
1534 : }
1535 : #endif // CONFIG_TX64X64
1536 :
1537 : #if CONFIG_HIGHBITDEPTH
1538 0 : void av1_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
1539 : int stride, int tx_type, int bd) {
1540 : static const highbd_transform_2d HIGH_IHT_4[] = {
1541 : { aom_highbd_idct4_c, aom_highbd_idct4_c }, // DCT_DCT
1542 : { aom_highbd_iadst4_c, aom_highbd_idct4_c }, // ADST_DCT
1543 : { aom_highbd_idct4_c, aom_highbd_iadst4_c }, // DCT_ADST
1544 : { aom_highbd_iadst4_c, aom_highbd_iadst4_c }, // ADST_ADST
1545 : #if CONFIG_EXT_TX
1546 : { aom_highbd_iadst4_c, aom_highbd_idct4_c }, // FLIPADST_DCT
1547 : { aom_highbd_idct4_c, aom_highbd_iadst4_c }, // DCT_FLIPADST
1548 : { aom_highbd_iadst4_c, aom_highbd_iadst4_c }, // FLIPADST_FLIPADST
1549 : { aom_highbd_iadst4_c, aom_highbd_iadst4_c }, // ADST_FLIPADST
1550 : { aom_highbd_iadst4_c, aom_highbd_iadst4_c }, // FLIPADST_ADST
1551 : { highbd_iidtx4_c, highbd_iidtx4_c }, // IDTX
1552 : { aom_highbd_idct4_c, highbd_iidtx4_c }, // V_DCT
1553 : { highbd_iidtx4_c, aom_highbd_idct4_c }, // H_DCT
1554 : { aom_highbd_iadst4_c, highbd_iidtx4_c }, // V_ADST
1555 : { highbd_iidtx4_c, aom_highbd_iadst4_c }, // H_ADST
1556 : { aom_highbd_iadst4_c, highbd_iidtx4_c }, // V_FLIPADST
1557 : { highbd_iidtx4_c, aom_highbd_iadst4_c }, // H_FLIPADST
1558 : #endif // CONFIG_EXT_TX
1559 : };
1560 :
1561 0 : uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1562 :
1563 : int i, j;
1564 : tran_low_t tmp[4][4];
1565 : tran_low_t out[4][4];
1566 0 : tran_low_t *outp = &out[0][0];
1567 0 : int outstride = 4;
1568 :
1569 : // inverse transform row vectors
1570 0 : for (i = 0; i < 4; ++i) {
1571 0 : HIGH_IHT_4[tx_type].rows(input, out[i], bd);
1572 0 : input += 4;
1573 : }
1574 :
1575 : // transpose
1576 0 : for (i = 0; i < 4; i++) {
1577 0 : for (j = 0; j < 4; j++) {
1578 0 : tmp[j][i] = out[i][j];
1579 : }
1580 : }
1581 :
1582 : // inverse transform column vectors
1583 0 : for (i = 0; i < 4; ++i) {
1584 0 : HIGH_IHT_4[tx_type].cols(tmp[i], out[i], bd);
1585 : }
1586 :
1587 : #if CONFIG_EXT_TX
1588 0 : maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 4, 4);
1589 : #endif
1590 :
1591 : // Sum with the destination
1592 0 : for (i = 0; i < 4; ++i) {
1593 0 : for (j = 0; j < 4; ++j) {
1594 0 : int d = i * stride + j;
1595 0 : int s = j * outstride + i;
1596 0 : dest[d] =
1597 0 : highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4), bd);
1598 : }
1599 : }
1600 0 : }
1601 :
1602 0 : void av1_highbd_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest8,
1603 : int stride, int tx_type, int bd) {
1604 : static const highbd_transform_2d HIGH_IHT_4x8[] = {
1605 : { aom_highbd_idct8_c, aom_highbd_idct4_c }, // DCT_DCT
1606 : { aom_highbd_iadst8_c, aom_highbd_idct4_c }, // ADST_DCT
1607 : { aom_highbd_idct8_c, aom_highbd_iadst4_c }, // DCT_ADST
1608 : { aom_highbd_iadst8_c, aom_highbd_iadst4_c }, // ADST_ADST
1609 : #if CONFIG_EXT_TX
1610 : { aom_highbd_iadst8_c, aom_highbd_idct4_c }, // FLIPADST_DCT
1611 : { aom_highbd_idct8_c, aom_highbd_iadst4_c }, // DCT_FLIPADST
1612 : { aom_highbd_iadst8_c, aom_highbd_iadst4_c }, // FLIPADST_FLIPADST
1613 : { aom_highbd_iadst8_c, aom_highbd_iadst4_c }, // ADST_FLIPADST
1614 : { aom_highbd_iadst8_c, aom_highbd_iadst4_c }, // FLIPADST_ADST
1615 : { highbd_iidtx8_c, highbd_iidtx4_c }, // IDTX
1616 : { aom_highbd_idct8_c, highbd_iidtx4_c }, // V_DCT
1617 : { highbd_iidtx8_c, aom_highbd_idct4_c }, // H_DCT
1618 : { aom_highbd_iadst8_c, highbd_iidtx4_c }, // V_ADST
1619 : { highbd_iidtx8_c, aom_highbd_iadst4_c }, // H_ADST
1620 : { aom_highbd_iadst8_c, highbd_iidtx4_c }, // V_FLIPADST
1621 : { highbd_iidtx8_c, aom_highbd_iadst4_c }, // H_FLIPADST
1622 : #endif // CONFIG_EXT_TX
1623 : };
1624 0 : const int n = 4;
1625 0 : const int n2 = 8;
1626 :
1627 0 : uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1628 :
1629 : int i, j;
1630 : tran_low_t out[4][8], tmp[4][8], outtmp[4];
1631 0 : tran_low_t *outp = &out[0][0];
1632 0 : int outstride = n2;
1633 :
1634 : // inverse transform row vectors, and transpose
1635 0 : for (i = 0; i < n2; ++i) {
1636 0 : HIGH_IHT_4x8[tx_type].rows(input, outtmp, bd);
1637 0 : for (j = 0; j < n; ++j) {
1638 0 : tmp[j][i] = HIGHBD_WRAPLOW(dct_const_round_shift(outtmp[j] * Sqrt2), bd);
1639 : }
1640 0 : input += n;
1641 : }
1642 :
1643 : // inverse transform column vectors
1644 0 : for (i = 0; i < n; ++i) {
1645 0 : HIGH_IHT_4x8[tx_type].cols(tmp[i], out[i], bd);
1646 : }
1647 :
1648 : #if CONFIG_EXT_TX
1649 0 : maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n2, n);
1650 : #endif // CONFIG_EXT_TX
1651 :
1652 : // Sum with the destination
1653 0 : for (i = 0; i < n2; ++i) {
1654 0 : for (j = 0; j < n; ++j) {
1655 0 : int d = i * stride + j;
1656 0 : int s = j * outstride + i;
1657 0 : dest[d] =
1658 0 : highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5), bd);
1659 : }
1660 : }
1661 0 : }
1662 :
1663 0 : void av1_highbd_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest8,
1664 : int stride, int tx_type, int bd) {
1665 : static const highbd_transform_2d HIGH_IHT_8x4[] = {
1666 : { aom_highbd_idct4_c, aom_highbd_idct8_c }, // DCT_DCT
1667 : { aom_highbd_iadst4_c, aom_highbd_idct8_c }, // ADST_DCT
1668 : { aom_highbd_idct4_c, aom_highbd_iadst8_c }, // DCT_ADST
1669 : { aom_highbd_iadst4_c, aom_highbd_iadst8_c }, // ADST_ADST
1670 : #if CONFIG_EXT_TX
1671 : { aom_highbd_iadst4_c, aom_highbd_idct8_c }, // FLIPADST_DCT
1672 : { aom_highbd_idct4_c, aom_highbd_iadst8_c }, // DCT_FLIPADST
1673 : { aom_highbd_iadst4_c, aom_highbd_iadst8_c }, // FLIPADST_FLIPADST
1674 : { aom_highbd_iadst4_c, aom_highbd_iadst8_c }, // ADST_FLIPADST
1675 : { aom_highbd_iadst4_c, aom_highbd_iadst8_c }, // FLIPADST_ADST
1676 : { highbd_iidtx4_c, highbd_iidtx8_c }, // IDTX
1677 : { aom_highbd_idct4_c, highbd_iidtx8_c }, // V_DCT
1678 : { highbd_iidtx4_c, aom_highbd_idct8_c }, // H_DCT
1679 : { aom_highbd_iadst4_c, highbd_iidtx8_c }, // V_ADST
1680 : { highbd_iidtx4_c, aom_highbd_iadst8_c }, // H_ADST
1681 : { aom_highbd_iadst4_c, highbd_iidtx8_c }, // V_FLIPADST
1682 : { highbd_iidtx4_c, aom_highbd_iadst8_c }, // H_FLIPADST
1683 : #endif // CONFIG_EXT_TX
1684 : };
1685 0 : const int n = 4;
1686 0 : const int n2 = 8;
1687 :
1688 0 : uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1689 :
1690 : int i, j;
1691 : tran_low_t out[8][4], tmp[8][4], outtmp[8];
1692 0 : tran_low_t *outp = &out[0][0];
1693 0 : int outstride = n;
1694 :
1695 : // inverse transform row vectors, and transpose
1696 0 : for (i = 0; i < n; ++i) {
1697 0 : HIGH_IHT_8x4[tx_type].rows(input, outtmp, bd);
1698 0 : for (j = 0; j < n2; ++j) {
1699 0 : tmp[j][i] = HIGHBD_WRAPLOW(dct_const_round_shift(outtmp[j] * Sqrt2), bd);
1700 : }
1701 0 : input += n2;
1702 : }
1703 :
1704 : // inverse transform column vectors
1705 0 : for (i = 0; i < n2; ++i) {
1706 0 : HIGH_IHT_8x4[tx_type].cols(tmp[i], out[i], bd);
1707 : }
1708 :
1709 : #if CONFIG_EXT_TX
1710 0 : maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n, n2);
1711 : #endif // CONFIG_EXT_TX
1712 :
1713 : // Sum with the destination
1714 0 : for (i = 0; i < n; ++i) {
1715 0 : for (j = 0; j < n2; ++j) {
1716 0 : int d = i * stride + j;
1717 0 : int s = j * outstride + i;
1718 0 : dest[d] =
1719 0 : highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5), bd);
1720 : }
1721 : }
1722 0 : }
1723 :
1724 0 : void av1_highbd_iht4x16_64_add_c(const tran_low_t *input, uint8_t *dest8,
1725 : int stride, int tx_type, int bd) {
1726 : static const highbd_transform_2d HIGH_IHT_4x16[] = {
1727 : { aom_highbd_idct16_c, aom_highbd_idct4_c }, // DCT_DCT
1728 : { aom_highbd_iadst16_c, aom_highbd_idct4_c }, // ADST_DCT
1729 : { aom_highbd_idct16_c, aom_highbd_iadst4_c }, // DCT_ADST
1730 : { aom_highbd_iadst16_c, aom_highbd_iadst4_c }, // ADST_ADST
1731 : #if CONFIG_EXT_TX
1732 : { aom_highbd_iadst16_c, aom_highbd_idct4_c }, // FLIPADST_DCT
1733 : { aom_highbd_idct16_c, aom_highbd_iadst4_c }, // DCT_FLIPADST
1734 : { aom_highbd_iadst16_c, aom_highbd_iadst4_c }, // FLIPADST_FLIPADST
1735 : { aom_highbd_iadst16_c, aom_highbd_iadst4_c }, // ADST_FLIPADST
1736 : { aom_highbd_iadst16_c, aom_highbd_iadst4_c }, // FLIPADST_ADST
1737 : { highbd_iidtx16_c, highbd_iidtx4_c }, // IDTX
1738 : { aom_highbd_idct16_c, highbd_iidtx4_c }, // V_DCT
1739 : { highbd_iidtx16_c, aom_highbd_idct4_c }, // H_DCT
1740 : { aom_highbd_iadst16_c, highbd_iidtx4_c }, // V_ADST
1741 : { highbd_iidtx16_c, aom_highbd_iadst4_c }, // H_ADST
1742 : { aom_highbd_iadst16_c, highbd_iidtx4_c }, // V_FLIPADST
1743 : { highbd_iidtx16_c, aom_highbd_iadst4_c }, // H_FLIPADST
1744 : #endif // CONFIG_EXT_TX
1745 : };
1746 0 : const int n = 4;
1747 0 : const int n4 = 16;
1748 :
1749 0 : uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1750 :
1751 : int i, j;
1752 : tran_low_t out[4][16], tmp[4][16], outtmp[4];
1753 0 : tran_low_t *outp = &out[0][0];
1754 0 : int outstride = n4;
1755 :
1756 : // inverse transform row vectors, and transpose
1757 0 : for (i = 0; i < n4; ++i) {
1758 0 : HIGH_IHT_4x16[tx_type].rows(input, outtmp, bd);
1759 0 : for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j];
1760 0 : input += n;
1761 : }
1762 :
1763 : // inverse transform column vectors
1764 0 : for (i = 0; i < n; ++i) HIGH_IHT_4x16[tx_type].cols(tmp[i], out[i], bd);
1765 :
1766 : #if CONFIG_EXT_TX
1767 0 : maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n4, n);
1768 : #endif // CONFIG_EXT_TX
1769 :
1770 : // Sum with the destination
1771 0 : for (i = 0; i < n4; ++i) {
1772 0 : for (j = 0; j < n; ++j) {
1773 0 : int d = i * stride + j;
1774 0 : int s = j * outstride + i;
1775 0 : dest[d] =
1776 0 : highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5), bd);
1777 : }
1778 : }
1779 0 : }
1780 :
1781 0 : void av1_highbd_iht16x4_64_add_c(const tran_low_t *input, uint8_t *dest8,
1782 : int stride, int tx_type, int bd) {
1783 : static const highbd_transform_2d HIGH_IHT_16x4[] = {
1784 : { aom_highbd_idct4_c, aom_highbd_idct16_c }, // DCT_DCT
1785 : { aom_highbd_iadst4_c, aom_highbd_idct16_c }, // ADST_DCT
1786 : { aom_highbd_idct4_c, aom_highbd_iadst16_c }, // DCT_ADST
1787 : { aom_highbd_iadst4_c, aom_highbd_iadst16_c }, // ADST_ADST
1788 : #if CONFIG_EXT_TX
1789 : { aom_highbd_iadst4_c, aom_highbd_idct16_c }, // FLIPADST_DCT
1790 : { aom_highbd_idct4_c, aom_highbd_iadst16_c }, // DCT_FLIPADST
1791 : { aom_highbd_iadst4_c, aom_highbd_iadst16_c }, // FLIPADST_FLIPADST
1792 : { aom_highbd_iadst4_c, aom_highbd_iadst16_c }, // ADST_FLIPADST
1793 : { aom_highbd_iadst4_c, aom_highbd_iadst16_c }, // FLIPADST_ADST
1794 : { highbd_iidtx4_c, highbd_iidtx16_c }, // IDTX
1795 : { aom_highbd_idct4_c, highbd_iidtx16_c }, // V_DCT
1796 : { highbd_iidtx4_c, aom_highbd_idct16_c }, // H_DCT
1797 : { aom_highbd_iadst4_c, highbd_iidtx16_c }, // V_ADST
1798 : { highbd_iidtx4_c, aom_highbd_iadst16_c }, // H_ADST
1799 : { aom_highbd_iadst4_c, highbd_iidtx16_c }, // V_FLIPADST
1800 : { highbd_iidtx4_c, aom_highbd_iadst16_c }, // H_FLIPADST
1801 : #endif // CONFIG_EXT_TX
1802 : };
1803 0 : const int n = 4;
1804 0 : const int n4 = 16;
1805 :
1806 0 : uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1807 :
1808 : int i, j;
1809 : tran_low_t out[16][4], tmp[16][4], outtmp[16];
1810 0 : tran_low_t *outp = &out[0][0];
1811 0 : int outstride = n;
1812 :
1813 : // inverse transform row vectors, and transpose
1814 0 : for (i = 0; i < n; ++i) {
1815 0 : HIGH_IHT_16x4[tx_type].rows(input, outtmp, bd);
1816 0 : for (j = 0; j < n4; ++j) tmp[j][i] = outtmp[j];
1817 0 : input += n4;
1818 : }
1819 :
1820 : // inverse transform column vectors
1821 0 : for (i = 0; i < n4; ++i) {
1822 0 : HIGH_IHT_16x4[tx_type].cols(tmp[i], out[i], bd);
1823 : }
1824 :
1825 : #if CONFIG_EXT_TX
1826 0 : maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n, n4);
1827 : #endif // CONFIG_EXT_TX
1828 :
1829 : // Sum with the destination
1830 0 : for (i = 0; i < n; ++i) {
1831 0 : for (j = 0; j < n4; ++j) {
1832 0 : int d = i * stride + j;
1833 0 : int s = j * outstride + i;
1834 0 : dest[d] =
1835 0 : highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5), bd);
1836 : }
1837 : }
1838 0 : }
1839 :
1840 0 : void av1_highbd_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest8,
1841 : int stride, int tx_type, int bd) {
1842 : static const highbd_transform_2d HIGH_IHT_8x16[] = {
1843 : { aom_highbd_idct16_c, aom_highbd_idct8_c }, // DCT_DCT
1844 : { aom_highbd_iadst16_c, aom_highbd_idct8_c }, // ADST_DCT
1845 : { aom_highbd_idct16_c, aom_highbd_iadst8_c }, // DCT_ADST
1846 : { aom_highbd_iadst16_c, aom_highbd_iadst8_c }, // ADST_ADST
1847 : #if CONFIG_EXT_TX
1848 : { aom_highbd_iadst16_c, aom_highbd_idct8_c }, // FLIPADST_DCT
1849 : { aom_highbd_idct16_c, aom_highbd_iadst8_c }, // DCT_FLIPADST
1850 : { aom_highbd_iadst16_c, aom_highbd_iadst8_c }, // FLIPADST_FLIPADST
1851 : { aom_highbd_iadst16_c, aom_highbd_iadst8_c }, // ADST_FLIPADST
1852 : { aom_highbd_iadst16_c, aom_highbd_iadst8_c }, // FLIPADST_ADST
1853 : { highbd_iidtx16_c, highbd_iidtx8_c }, // IDTX
1854 : { aom_highbd_idct16_c, highbd_iidtx8_c }, // V_DCT
1855 : { highbd_iidtx16_c, aom_highbd_idct8_c }, // H_DCT
1856 : { aom_highbd_iadst16_c, highbd_iidtx8_c }, // V_ADST
1857 : { highbd_iidtx16_c, aom_highbd_iadst8_c }, // H_ADST
1858 : { aom_highbd_iadst16_c, highbd_iidtx8_c }, // V_FLIPADST
1859 : { highbd_iidtx16_c, aom_highbd_iadst8_c }, // H_FLIPADST
1860 : #endif // CONFIG_EXT_TX
1861 : };
1862 0 : const int n = 8;
1863 0 : const int n2 = 16;
1864 :
1865 0 : uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1866 :
1867 : int i, j;
1868 : tran_low_t out[8][16], tmp[8][16], outtmp[8];
1869 0 : tran_low_t *outp = &out[0][0];
1870 0 : int outstride = n2;
1871 :
1872 : // inverse transform row vectors, and transpose
1873 0 : for (i = 0; i < n2; ++i) {
1874 0 : HIGH_IHT_8x16[tx_type].rows(input, outtmp, bd);
1875 0 : for (j = 0; j < n; ++j)
1876 0 : tmp[j][i] = HIGHBD_WRAPLOW(dct_const_round_shift(outtmp[j] * Sqrt2), bd);
1877 0 : input += n;
1878 : }
1879 :
1880 : // inverse transform column vectors
1881 0 : for (i = 0; i < n; ++i) {
1882 0 : HIGH_IHT_8x16[tx_type].cols(tmp[i], out[i], bd);
1883 : }
1884 :
1885 : #if CONFIG_EXT_TX
1886 0 : maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n2, n);
1887 : #endif // CONFIG_EXT_TX
1888 :
1889 : // Sum with the destination
1890 0 : for (i = 0; i < n2; ++i) {
1891 0 : for (j = 0; j < n; ++j) {
1892 0 : int d = i * stride + j;
1893 0 : int s = j * outstride + i;
1894 0 : dest[d] =
1895 0 : highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
1896 : }
1897 : }
1898 0 : }
1899 :
1900 0 : void av1_highbd_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest8,
1901 : int stride, int tx_type, int bd) {
1902 : static const highbd_transform_2d HIGH_IHT_16x8[] = {
1903 : { aom_highbd_idct8_c, aom_highbd_idct16_c }, // DCT_DCT
1904 : { aom_highbd_iadst8_c, aom_highbd_idct16_c }, // ADST_DCT
1905 : { aom_highbd_idct8_c, aom_highbd_iadst16_c }, // DCT_ADST
1906 : { aom_highbd_iadst8_c, aom_highbd_iadst16_c }, // ADST_ADST
1907 : #if CONFIG_EXT_TX
1908 : { aom_highbd_iadst8_c, aom_highbd_idct16_c }, // FLIPADST_DCT
1909 : { aom_highbd_idct8_c, aom_highbd_iadst16_c }, // DCT_FLIPADST
1910 : { aom_highbd_iadst8_c, aom_highbd_iadst16_c }, // FLIPADST_FLIPADST
1911 : { aom_highbd_iadst8_c, aom_highbd_iadst16_c }, // ADST_FLIPADST
1912 : { aom_highbd_iadst8_c, aom_highbd_iadst16_c }, // FLIPADST_ADST
1913 : { highbd_iidtx8_c, highbd_iidtx16_c }, // IDTX
1914 : { aom_highbd_idct8_c, highbd_iidtx16_c }, // V_DCT
1915 : { highbd_iidtx8_c, aom_highbd_idct16_c }, // H_DCT
1916 : { aom_highbd_iadst8_c, highbd_iidtx16_c }, // V_ADST
1917 : { highbd_iidtx8_c, aom_highbd_iadst16_c }, // H_ADST
1918 : { aom_highbd_iadst8_c, highbd_iidtx16_c }, // V_FLIPADST
1919 : { highbd_iidtx8_c, aom_highbd_iadst16_c }, // H_FLIPADST
1920 : #endif // CONFIG_EXT_TX
1921 : };
1922 0 : const int n = 8;
1923 0 : const int n2 = 16;
1924 :
1925 0 : uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1926 :
1927 : int i, j;
1928 : tran_low_t out[16][8], tmp[16][8], outtmp[16];
1929 0 : tran_low_t *outp = &out[0][0];
1930 0 : int outstride = n;
1931 :
1932 : // inverse transform row vectors, and transpose
1933 0 : for (i = 0; i < n; ++i) {
1934 0 : HIGH_IHT_16x8[tx_type].rows(input, outtmp, bd);
1935 0 : for (j = 0; j < n2; ++j)
1936 0 : tmp[j][i] = HIGHBD_WRAPLOW(dct_const_round_shift(outtmp[j] * Sqrt2), bd);
1937 0 : input += n2;
1938 : }
1939 :
1940 : // inverse transform column vectors
1941 0 : for (i = 0; i < n2; ++i) {
1942 0 : HIGH_IHT_16x8[tx_type].cols(tmp[i], out[i], bd);
1943 : }
1944 :
1945 : #if CONFIG_EXT_TX
1946 0 : maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n, n2);
1947 : #endif // CONFIG_EXT_TX
1948 :
1949 : // Sum with the destination
1950 0 : for (i = 0; i < n; ++i) {
1951 0 : for (j = 0; j < n2; ++j) {
1952 0 : int d = i * stride + j;
1953 0 : int s = j * outstride + i;
1954 0 : dest[d] =
1955 0 : highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
1956 : }
1957 : }
1958 0 : }
1959 :
1960 0 : void av1_highbd_iht8x32_256_add_c(const tran_low_t *input, uint8_t *dest8,
1961 : int stride, int tx_type, int bd) {
1962 : static const highbd_transform_2d HIGH_IHT_8x32[] = {
1963 : { aom_highbd_idct32_c, aom_highbd_idct8_c }, // DCT_DCT
1964 : { highbd_ihalfright32_c, aom_highbd_idct8_c }, // ADST_DCT
1965 : { aom_highbd_idct32_c, aom_highbd_iadst8_c }, // DCT_ADST
1966 : { highbd_ihalfright32_c, aom_highbd_iadst8_c }, // ADST_ADST
1967 : #if CONFIG_EXT_TX
1968 : { highbd_ihalfright32_c, aom_highbd_idct8_c }, // FLIPADST_DCT
1969 : { aom_highbd_idct32_c, aom_highbd_iadst8_c }, // DCT_FLIPADST
1970 : { highbd_ihalfright32_c, aom_highbd_iadst8_c }, // FLIPADST_FLIPADST
1971 : { highbd_ihalfright32_c, aom_highbd_iadst8_c }, // ADST_FLIPADST
1972 : { highbd_ihalfright32_c, aom_highbd_iadst8_c }, // FLIPADST_ADST
1973 : { highbd_iidtx32_c, highbd_iidtx8_c }, // IDTX
1974 : { aom_highbd_idct32_c, highbd_iidtx8_c }, // V_DCT
1975 : { highbd_iidtx32_c, aom_highbd_idct8_c }, // H_DCT
1976 : { highbd_ihalfright32_c, highbd_iidtx8_c }, // V_ADST
1977 : { highbd_iidtx32_c, aom_highbd_iadst8_c }, // H_ADST
1978 : { highbd_ihalfright32_c, highbd_iidtx8_c }, // V_FLIPADST
1979 : { highbd_iidtx32_c, aom_highbd_iadst8_c }, // H_FLIPADST
1980 : #endif // CONFIG_EXT_TX
1981 : };
1982 0 : const int n = 8;
1983 0 : const int n4 = 32;
1984 :
1985 0 : uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1986 :
1987 : int i, j;
1988 : tran_low_t out[8][32], tmp[8][32], outtmp[8];
1989 0 : tran_low_t *outp = &out[0][0];
1990 0 : int outstride = n4;
1991 :
1992 : // inverse transform row vectors, and transpose
1993 0 : for (i = 0; i < n4; ++i) {
1994 0 : HIGH_IHT_8x32[tx_type].rows(input, outtmp, bd);
1995 0 : for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j];
1996 0 : input += n;
1997 : }
1998 :
1999 : // inverse transform column vectors
2000 0 : for (i = 0; i < n; ++i) HIGH_IHT_8x32[tx_type].cols(tmp[i], out[i], bd);
2001 :
2002 : #if CONFIG_EXT_TX
2003 0 : maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n4, n);
2004 : #endif // CONFIG_EXT_TX
2005 :
2006 : // Sum with the destination
2007 0 : for (i = 0; i < n4; ++i) {
2008 0 : for (j = 0; j < n; ++j) {
2009 0 : int d = i * stride + j;
2010 0 : int s = j * outstride + i;
2011 0 : dest[d] =
2012 0 : highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
2013 : }
2014 : }
2015 0 : }
2016 :
2017 0 : void av1_highbd_iht32x8_256_add_c(const tran_low_t *input, uint8_t *dest8,
2018 : int stride, int tx_type, int bd) {
2019 : static const highbd_transform_2d HIGH_IHT_32x8[] = {
2020 : { aom_highbd_idct8_c, aom_highbd_idct32_c }, // DCT_DCT
2021 : { aom_highbd_iadst8_c, aom_highbd_idct32_c }, // ADST_DCT
2022 : { aom_highbd_idct8_c, highbd_ihalfright32_c }, // DCT_ADST
2023 : { aom_highbd_iadst8_c, highbd_ihalfright32_c }, // ADST_ADST
2024 : #if CONFIG_EXT_TX
2025 : { aom_highbd_iadst8_c, aom_highbd_idct32_c }, // FLIPADST_DCT
2026 : { aom_highbd_idct8_c, highbd_ihalfright32_c }, // DCT_FLIPADST
2027 : { aom_highbd_iadst8_c, highbd_ihalfright32_c }, // FLIPADST_FLIPADST
2028 : { aom_highbd_iadst8_c, highbd_ihalfright32_c }, // ADST_FLIPADST
2029 : { aom_highbd_iadst8_c, highbd_ihalfright32_c }, // FLIPADST_ADST
2030 : { highbd_iidtx8_c, highbd_iidtx32_c }, // IDTX
2031 : { aom_highbd_idct8_c, highbd_iidtx32_c }, // V_DCT
2032 : { highbd_iidtx8_c, aom_highbd_idct32_c }, // H_DCT
2033 : { aom_highbd_iadst8_c, highbd_iidtx32_c }, // V_ADST
2034 : { highbd_iidtx8_c, highbd_ihalfright32_c }, // H_ADST
2035 : { aom_highbd_iadst8_c, highbd_iidtx32_c }, // V_FLIPADST
2036 : { highbd_iidtx8_c, highbd_ihalfright32_c }, // H_FLIPADST
2037 : #endif // CONFIG_EXT_TX
2038 : };
2039 0 : const int n = 8;
2040 0 : const int n4 = 32;
2041 :
2042 0 : uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
2043 :
2044 : int i, j;
2045 : tran_low_t out[32][8], tmp[32][8], outtmp[32];
2046 0 : tran_low_t *outp = &out[0][0];
2047 0 : int outstride = n;
2048 :
2049 : // inverse transform row vectors, and transpose
2050 0 : for (i = 0; i < n; ++i) {
2051 0 : HIGH_IHT_32x8[tx_type].rows(input, outtmp, bd);
2052 0 : for (j = 0; j < n4; ++j) tmp[j][i] = outtmp[j];
2053 0 : input += n4;
2054 : }
2055 :
2056 : // inverse transform column vectors
2057 0 : for (i = 0; i < n4; ++i) HIGH_IHT_32x8[tx_type].cols(tmp[i], out[i], bd);
2058 :
2059 : #if CONFIG_EXT_TX
2060 0 : maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n, n4);
2061 : #endif // CONFIG_EXT_TX
2062 :
2063 : // Sum with the destination
2064 0 : for (i = 0; i < n; ++i) {
2065 0 : for (j = 0; j < n4; ++j) {
2066 0 : int d = i * stride + j;
2067 0 : int s = j * outstride + i;
2068 0 : dest[d] =
2069 0 : highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
2070 : }
2071 : }
2072 0 : }
2073 :
2074 0 : void av1_highbd_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest8,
2075 : int stride, int tx_type, int bd) {
2076 : static const highbd_transform_2d HIGH_IHT_16x32[] = {
2077 : { aom_highbd_idct32_c, aom_highbd_idct16_c }, // DCT_DCT
2078 : { highbd_ihalfright32_c, aom_highbd_idct16_c }, // ADST_DCT
2079 : { aom_highbd_idct32_c, aom_highbd_iadst16_c }, // DCT_ADST
2080 : { highbd_ihalfright32_c, aom_highbd_iadst16_c }, // ADST_ADST
2081 : #if CONFIG_EXT_TX
2082 : { highbd_ihalfright32_c, aom_highbd_idct16_c }, // FLIPADST_DCT
2083 : { aom_highbd_idct32_c, aom_highbd_iadst16_c }, // DCT_FLIPADST
2084 : { highbd_ihalfright32_c, aom_highbd_iadst16_c }, // FLIPADST_FLIPADST
2085 : { highbd_ihalfright32_c, aom_highbd_iadst16_c }, // ADST_FLIPADST
2086 : { highbd_ihalfright32_c, aom_highbd_iadst16_c }, // FLIPADST_ADST
2087 : { highbd_iidtx32_c, highbd_iidtx16_c }, // IDTX
2088 : { aom_highbd_idct32_c, highbd_iidtx16_c }, // V_DCT
2089 : { highbd_iidtx32_c, aom_highbd_idct16_c }, // H_DCT
2090 : { highbd_ihalfright32_c, highbd_iidtx16_c }, // V_ADST
2091 : { highbd_iidtx32_c, aom_highbd_iadst16_c }, // H_ADST
2092 : { highbd_ihalfright32_c, highbd_iidtx16_c }, // V_FLIPADST
2093 : { highbd_iidtx32_c, aom_highbd_iadst16_c }, // H_FLIPADST
2094 : #endif // CONFIG_EXT_TX
2095 : };
2096 0 : const int n = 16;
2097 0 : const int n2 = 32;
2098 :
2099 0 : uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
2100 :
2101 : int i, j;
2102 : tran_low_t out[16][32], tmp[16][32], outtmp[16];
2103 0 : tran_low_t *outp = &out[0][0];
2104 0 : int outstride = n2;
2105 :
2106 : // inverse transform row vectors, and transpose
2107 0 : for (i = 0; i < n2; ++i) {
2108 0 : HIGH_IHT_16x32[tx_type].rows(input, outtmp, bd);
2109 0 : for (j = 0; j < n; ++j)
2110 0 : tmp[j][i] = HIGHBD_WRAPLOW(dct_const_round_shift(outtmp[j] * Sqrt2), bd);
2111 0 : input += n;
2112 : }
2113 :
2114 : // inverse transform column vectors
2115 0 : for (i = 0; i < n; ++i) {
2116 0 : HIGH_IHT_16x32[tx_type].cols(tmp[i], out[i], bd);
2117 : }
2118 :
2119 : #if CONFIG_EXT_TX
2120 0 : maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n2, n);
2121 : #endif // CONFIG_EXT_TX
2122 :
2123 : // Sum with the destination
2124 0 : for (i = 0; i < n2; ++i) {
2125 0 : for (j = 0; j < n; ++j) {
2126 0 : int d = i * stride + j;
2127 0 : int s = j * outstride + i;
2128 0 : dest[d] =
2129 0 : highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
2130 : }
2131 : }
2132 0 : }
2133 :
2134 0 : void av1_highbd_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest8,
2135 : int stride, int tx_type, int bd) {
2136 : static const highbd_transform_2d HIGH_IHT_32x16[] = {
2137 : { aom_highbd_idct16_c, aom_highbd_idct32_c }, // DCT_DCT
2138 : { aom_highbd_iadst16_c, aom_highbd_idct32_c }, // ADST_DCT
2139 : { aom_highbd_idct16_c, highbd_ihalfright32_c }, // DCT_ADST
2140 : { aom_highbd_iadst16_c, highbd_ihalfright32_c }, // ADST_ADST
2141 : #if CONFIG_EXT_TX
2142 : { aom_highbd_iadst16_c, aom_highbd_idct32_c }, // FLIPADST_DCT
2143 : { aom_highbd_idct16_c, highbd_ihalfright32_c }, // DCT_FLIPADST
2144 : { aom_highbd_iadst16_c, highbd_ihalfright32_c }, // FLIPADST_FLIPADST
2145 : { aom_highbd_iadst16_c, highbd_ihalfright32_c }, // ADST_FLIPADST
2146 : { aom_highbd_iadst16_c, highbd_ihalfright32_c }, // FLIPADST_ADST
2147 : { highbd_iidtx16_c, highbd_iidtx32_c }, // IDTX
2148 : { aom_highbd_idct16_c, highbd_iidtx32_c }, // V_DCT
2149 : { highbd_iidtx16_c, aom_highbd_idct32_c }, // H_DCT
2150 : { aom_highbd_iadst16_c, highbd_iidtx32_c }, // V_ADST
2151 : { highbd_iidtx16_c, highbd_ihalfright32_c }, // H_ADST
2152 : { aom_highbd_iadst16_c, highbd_iidtx32_c }, // V_FLIPADST
2153 : { highbd_iidtx16_c, highbd_ihalfright32_c }, // H_FLIPADST
2154 : #endif // CONFIG_EXT_TX
2155 : };
2156 0 : const int n = 16;
2157 0 : const int n2 = 32;
2158 :
2159 0 : uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
2160 :
2161 : int i, j;
2162 : tran_low_t out[32][16], tmp[32][16], outtmp[32];
2163 0 : tran_low_t *outp = &out[0][0];
2164 0 : int outstride = n;
2165 :
2166 : // inverse transform row vectors, and transpose
2167 0 : for (i = 0; i < n; ++i) {
2168 0 : HIGH_IHT_32x16[tx_type].rows(input, outtmp, bd);
2169 0 : for (j = 0; j < n2; ++j)
2170 0 : tmp[j][i] = HIGHBD_WRAPLOW(dct_const_round_shift(outtmp[j] * Sqrt2), bd);
2171 0 : input += n2;
2172 : }
2173 :
2174 : // inverse transform column vectors
2175 0 : for (i = 0; i < n2; ++i) {
2176 0 : HIGH_IHT_32x16[tx_type].cols(tmp[i], out[i], bd);
2177 : }
2178 :
2179 : #if CONFIG_EXT_TX
2180 0 : maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n, n2);
2181 : #endif // CONFIG_EXT_TX
2182 :
2183 : // Sum with the destination
2184 0 : for (i = 0; i < n; ++i) {
2185 0 : for (j = 0; j < n2; ++j) {
2186 0 : int d = i * stride + j;
2187 0 : int s = j * outstride + i;
2188 0 : dest[d] =
2189 0 : highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
2190 : }
2191 : }
2192 0 : }
2193 :
2194 0 : void av1_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8,
2195 : int stride, int tx_type, int bd) {
2196 : static const highbd_transform_2d HIGH_IHT_8[] = {
2197 : { aom_highbd_idct8_c, aom_highbd_idct8_c }, // DCT_DCT
2198 : { aom_highbd_iadst8_c, aom_highbd_idct8_c }, // ADST_DCT
2199 : { aom_highbd_idct8_c, aom_highbd_iadst8_c }, // DCT_ADST
2200 : { aom_highbd_iadst8_c, aom_highbd_iadst8_c }, // ADST_ADST
2201 : #if CONFIG_EXT_TX
2202 : { aom_highbd_iadst8_c, aom_highbd_idct8_c }, // FLIPADST_DCT
2203 : { aom_highbd_idct8_c, aom_highbd_iadst8_c }, // DCT_FLIPADST
2204 : { aom_highbd_iadst8_c, aom_highbd_iadst8_c }, // FLIPADST_FLIPADST
2205 : { aom_highbd_iadst8_c, aom_highbd_iadst8_c }, // ADST_FLIPADST
2206 : { aom_highbd_iadst8_c, aom_highbd_iadst8_c }, // FLIPADST_ADST
2207 : { highbd_iidtx8_c, highbd_iidtx8_c }, // IDTX
2208 : { aom_highbd_idct8_c, highbd_iidtx8_c }, // V_DCT
2209 : { highbd_iidtx8_c, aom_highbd_idct8_c }, // H_DCT
2210 : { aom_highbd_iadst8_c, highbd_iidtx8_c }, // V_ADST
2211 : { highbd_iidtx8_c, aom_highbd_iadst8_c }, // H_ADST
2212 : { aom_highbd_iadst8_c, highbd_iidtx8_c }, // V_FLIPADST
2213 : { highbd_iidtx8_c, aom_highbd_iadst8_c }, // H_FLIPADST
2214 : #endif // CONFIG_EXT_TX
2215 : };
2216 :
2217 0 : uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
2218 :
2219 : int i, j;
2220 : tran_low_t tmp[8][8];
2221 : tran_low_t out[8][8];
2222 0 : tran_low_t *outp = &out[0][0];
2223 0 : int outstride = 8;
2224 :
2225 : // inverse transform row vectors
2226 0 : for (i = 0; i < 8; ++i) {
2227 0 : HIGH_IHT_8[tx_type].rows(input, out[i], bd);
2228 0 : input += 8;
2229 : }
2230 :
2231 : // transpose
2232 0 : for (i = 0; i < 8; i++) {
2233 0 : for (j = 0; j < 8; j++) {
2234 0 : tmp[j][i] = out[i][j];
2235 : }
2236 : }
2237 :
2238 : // inverse transform column vectors
2239 0 : for (i = 0; i < 8; ++i) {
2240 0 : HIGH_IHT_8[tx_type].cols(tmp[i], out[i], bd);
2241 : }
2242 :
2243 : #if CONFIG_EXT_TX
2244 0 : maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 8, 8);
2245 : #endif
2246 :
2247 : // Sum with the destination
2248 0 : for (i = 0; i < 8; ++i) {
2249 0 : for (j = 0; j < 8; ++j) {
2250 0 : int d = i * stride + j;
2251 0 : int s = j * outstride + i;
2252 0 : dest[d] =
2253 0 : highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5), bd);
2254 : }
2255 : }
2256 0 : }
2257 :
2258 0 : void av1_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8,
2259 : int stride, int tx_type, int bd) {
2260 : static const highbd_transform_2d HIGH_IHT_16[] = {
2261 : { aom_highbd_idct16_c, aom_highbd_idct16_c }, // DCT_DCT
2262 : { aom_highbd_iadst16_c, aom_highbd_idct16_c }, // ADST_DCT
2263 : { aom_highbd_idct16_c, aom_highbd_iadst16_c }, // DCT_ADST
2264 : { aom_highbd_iadst16_c, aom_highbd_iadst16_c }, // ADST_ADST
2265 : #if CONFIG_EXT_TX
2266 : { aom_highbd_iadst16_c, aom_highbd_idct16_c }, // FLIPADST_DCT
2267 : { aom_highbd_idct16_c, aom_highbd_iadst16_c }, // DCT_FLIPADST
2268 : { aom_highbd_iadst16_c, aom_highbd_iadst16_c }, // FLIPADST_FLIPADST
2269 : { aom_highbd_iadst16_c, aom_highbd_iadst16_c }, // ADST_FLIPADST
2270 : { aom_highbd_iadst16_c, aom_highbd_iadst16_c }, // FLIPADST_ADST
2271 : { highbd_iidtx16_c, highbd_iidtx16_c }, // IDTX
2272 : { aom_highbd_idct16_c, highbd_iidtx16_c }, // V_DCT
2273 : { highbd_iidtx16_c, aom_highbd_idct16_c }, // H_DCT
2274 : { aom_highbd_iadst16_c, highbd_iidtx16_c }, // V_ADST
2275 : { highbd_iidtx16_c, aom_highbd_iadst16_c }, // H_ADST
2276 : { aom_highbd_iadst16_c, highbd_iidtx16_c }, // V_FLIPADST
2277 : { highbd_iidtx16_c, aom_highbd_iadst16_c }, // H_FLIPADST
2278 : #endif // CONFIG_EXT_TX
2279 : };
2280 :
2281 0 : uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
2282 :
2283 : int i, j;
2284 : tran_low_t tmp[16][16];
2285 : tran_low_t out[16][16];
2286 0 : tran_low_t *outp = &out[0][0];
2287 0 : int outstride = 16;
2288 :
2289 : // inverse transform row vectors
2290 0 : for (i = 0; i < 16; ++i) {
2291 0 : HIGH_IHT_16[tx_type].rows(input, out[i], bd);
2292 0 : input += 16;
2293 : }
2294 :
2295 : // transpose
2296 0 : for (i = 0; i < 16; i++) {
2297 0 : for (j = 0; j < 16; j++) {
2298 0 : tmp[j][i] = out[i][j];
2299 : }
2300 : }
2301 :
2302 : // inverse transform column vectors
2303 0 : for (i = 0; i < 16; ++i) {
2304 0 : HIGH_IHT_16[tx_type].cols(tmp[i], out[i], bd);
2305 : }
2306 :
2307 : #if CONFIG_EXT_TX
2308 0 : maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 16, 16);
2309 : #endif
2310 :
2311 : // Sum with the destination
2312 0 : for (i = 0; i < 16; ++i) {
2313 0 : for (j = 0; j < 16; ++j) {
2314 0 : int d = i * stride + j;
2315 0 : int s = j * outstride + i;
2316 0 : dest[d] =
2317 0 : highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
2318 : }
2319 : }
2320 0 : }
2321 :
2322 : #if CONFIG_EXT_TX
2323 0 : static void highbd_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8,
2324 : int stride, int tx_type, int bd) {
2325 : static const highbd_transform_2d HIGH_IHT_32[] = {
2326 : { aom_highbd_idct32_c, aom_highbd_idct32_c }, // DCT_DCT
2327 : { highbd_ihalfright32_c, aom_highbd_idct32_c }, // ADST_DCT
2328 : { aom_highbd_idct32_c, highbd_ihalfright32_c }, // DCT_ADST
2329 : { highbd_ihalfright32_c, highbd_ihalfright32_c }, // ADST_ADST
2330 : { highbd_ihalfright32_c, aom_highbd_idct32_c }, // FLIPADST_DCT
2331 : { aom_highbd_idct32_c, highbd_ihalfright32_c }, // DCT_FLIPADST
2332 : { highbd_ihalfright32_c, highbd_ihalfright32_c }, // FLIPADST_FLIPADST
2333 : { highbd_ihalfright32_c, highbd_ihalfright32_c }, // ADST_FLIPADST
2334 : { highbd_ihalfright32_c, highbd_ihalfright32_c }, // FLIPADST_ADST
2335 : { highbd_iidtx32_c, highbd_iidtx32_c }, // IDTX
2336 : { aom_highbd_idct32_c, highbd_iidtx32_c }, // V_DCT
2337 : { highbd_iidtx32_c, aom_highbd_idct32_c }, // H_DCT
2338 : { highbd_ihalfright32_c, highbd_iidtx32_c }, // V_ADST
2339 : { highbd_iidtx32_c, highbd_ihalfright32_c }, // H_ADST
2340 : { highbd_ihalfright32_c, highbd_iidtx32_c }, // V_FLIPADST
2341 : { highbd_iidtx32_c, highbd_ihalfright32_c }, // H_FLIPADST
2342 : };
2343 :
2344 0 : uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
2345 :
2346 : int i, j;
2347 : tran_low_t tmp[32][32];
2348 : tran_low_t out[32][32];
2349 0 : tran_low_t *outp = &out[0][0];
2350 0 : int outstride = 32;
2351 :
2352 : // inverse transform row vectors
2353 0 : for (i = 0; i < 32; ++i) {
2354 0 : HIGH_IHT_32[tx_type].rows(input, out[i], bd);
2355 0 : input += 32;
2356 : }
2357 :
2358 : // transpose
2359 0 : for (i = 0; i < 32; i++) {
2360 0 : for (j = 0; j < 32; j++) {
2361 0 : tmp[j][i] = out[i][j];
2362 : }
2363 : }
2364 :
2365 : // inverse transform column vectors
2366 0 : for (i = 0; i < 32; ++i) {
2367 0 : HIGH_IHT_32[tx_type].cols(tmp[i], out[i], bd);
2368 : }
2369 :
2370 0 : maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 32, 32);
2371 :
2372 : // Sum with the destination
2373 0 : for (i = 0; i < 32; ++i) {
2374 0 : for (j = 0; j < 32; ++j) {
2375 0 : int d = i * stride + j;
2376 0 : int s = j * outstride + i;
2377 0 : dest[d] =
2378 0 : highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
2379 : }
2380 : }
2381 0 : }
2382 : #endif // CONFIG_EXT_TX
2383 :
2384 : #if CONFIG_TX64X64
2385 : static void highbd_iht64x64_4096_add_c(const tran_low_t *input, uint8_t *dest8,
2386 : int stride, int tx_type, int bd) {
2387 : static const highbd_transform_2d HIGH_IHT_64[] = {
2388 : { highbd_idct64_col_c, highbd_idct64_row_c }, // DCT_DCT
2389 : { highbd_ihalfright64_c, highbd_idct64_row_c }, // ADST_DCT
2390 : { highbd_idct64_col_c, highbd_ihalfright64_c }, // DCT_ADST
2391 : { highbd_ihalfright64_c, highbd_ihalfright64_c }, // ADST_ADST
2392 : #if CONFIG_EXT_TX
2393 : { highbd_ihalfright64_c, highbd_idct64_row_c }, // FLIPADST_DCT
2394 : { highbd_idct64_col_c, highbd_ihalfright64_c }, // DCT_FLIPADST
2395 : { highbd_ihalfright64_c, highbd_ihalfright64_c }, // FLIPADST_FLIPADST
2396 : { highbd_ihalfright64_c, highbd_ihalfright64_c }, // ADST_FLIPADST
2397 : { highbd_ihalfright64_c, highbd_ihalfright64_c }, // FLIPADST_ADST
2398 : { highbd_iidtx64_c, highbd_iidtx64_c }, // IDTX
2399 : { highbd_idct64_col_c, highbd_iidtx64_c }, // V_DCT
2400 : { highbd_iidtx64_c, highbd_idct64_row_c }, // H_DCT
2401 : { highbd_ihalfright64_c, highbd_iidtx64_c }, // V_ADST
2402 : { highbd_iidtx64_c, highbd_ihalfright64_c }, // H_ADST
2403 : { highbd_ihalfright64_c, highbd_iidtx64_c }, // V_FLIPADST
2404 : { highbd_iidtx64_c, highbd_ihalfright64_c }, // H_FLIPADST
2405 : #endif // CONFIG_EXT_TX
2406 : };
2407 :
2408 : uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
2409 :
2410 : int i, j;
2411 : tran_low_t tmp[64][64];
2412 : tran_low_t out[64][64];
2413 : tran_low_t *outp = &out[0][0];
2414 : int outstride = 64;
2415 :
2416 : // inverse transform row vectors
2417 : for (i = 0; i < 64; ++i) {
2418 : HIGH_IHT_64[tx_type].rows(input, out[i], bd);
2419 : for (j = 0; j < 64; ++j) out[i][j] = ROUND_POWER_OF_TWO(out[i][j], 1);
2420 : input += 64;
2421 : }
2422 :
2423 : // transpose
2424 : for (i = 0; i < 64; i++) {
2425 : for (j = 0; j < 64; j++) {
2426 : tmp[j][i] = out[i][j];
2427 : }
2428 : }
2429 :
2430 : // inverse transform column vectors
2431 : for (i = 0; i < 64; ++i) {
2432 : HIGH_IHT_64[tx_type].cols(tmp[i], out[i], bd);
2433 : }
2434 :
2435 : #if CONFIG_EXT_TX
2436 : maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 64, 64);
2437 : #endif // CONFIG_EXT_TX
2438 :
2439 : // Sum with the destination
2440 : for (i = 0; i < 64; ++i) {
2441 : for (j = 0; j < 64; ++j) {
2442 : int d = i * stride + j;
2443 : int s = j * outstride + i;
2444 : dest[d] =
2445 : highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5), bd);
2446 : }
2447 : }
2448 : }
2449 : #endif // CONFIG_TX64X64
2450 :
2451 : // idct
2452 0 : void av1_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
2453 : int eob, int bd) {
2454 0 : if (eob > 1)
2455 0 : aom_highbd_idct4x4_16_add(input, dest, stride, bd);
2456 : else
2457 0 : aom_highbd_idct4x4_1_add(input, dest, stride, bd);
2458 0 : }
2459 :
2460 0 : void av1_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
2461 : int eob, int bd) {
2462 0 : if (eob > 1)
2463 0 : aom_highbd_iwht4x4_16_add(input, dest, stride, bd);
2464 : else
2465 0 : aom_highbd_iwht4x4_1_add(input, dest, stride, bd);
2466 0 : }
2467 :
2468 : #if CONFIG_CHROMA_2X2
2469 : static void highbd_inv_txfm_add_2x2(const tran_low_t *input, uint8_t *dest,
2470 : int stride, int eob, int bd,
2471 : TX_TYPE tx_type, int lossless) {
2472 : tran_high_t a1 = input[0] >> UNIT_QUANT_SHIFT;
2473 : tran_high_t b1 = input[1] >> UNIT_QUANT_SHIFT;
2474 : tran_high_t c1 = input[2] >> UNIT_QUANT_SHIFT;
2475 : tran_high_t d1 = input[3] >> UNIT_QUANT_SHIFT;
2476 :
2477 : tran_high_t a2 = a1 + c1;
2478 : tran_high_t b2 = b1 + d1;
2479 : tran_high_t c2 = a1 - c1;
2480 : tran_high_t d2 = b1 - d1;
2481 :
2482 : uint16_t *dst = CONVERT_TO_SHORTPTR(dest);
2483 :
2484 : (void)tx_type;
2485 : (void)lossless;
2486 : (void)eob;
2487 :
2488 : a1 = (a2 + b2) >> 2;
2489 : b1 = (a2 - b2) >> 2;
2490 : c1 = (c2 + d2) >> 2;
2491 : d1 = (c2 - d2) >> 2;
2492 :
2493 : dst[0] = highbd_clip_pixel_add(dst[0], a1, bd);
2494 : dst[1] = highbd_clip_pixel_add(dst[1], b1, bd);
2495 : dst[stride] = highbd_clip_pixel_add(dst[stride], c1, bd);
2496 : dst[stride + 1] = highbd_clip_pixel_add(dst[stride + 1], d1, bd);
2497 : }
2498 : #endif
2499 :
2500 0 : void av1_highbd_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest,
2501 : int stride, int eob, int bd, TX_TYPE tx_type,
2502 : int lossless) {
2503 0 : if (lossless) {
2504 0 : assert(tx_type == DCT_DCT);
2505 0 : av1_highbd_iwht4x4_add(input, dest, stride, eob, bd);
2506 0 : return;
2507 : }
2508 :
2509 0 : switch (tx_type) {
2510 : case DCT_DCT:
2511 : case ADST_DCT:
2512 : case DCT_ADST:
2513 : case ADST_ADST:
2514 : #if CONFIG_EXT_TX
2515 : case FLIPADST_DCT:
2516 : case DCT_FLIPADST:
2517 : case FLIPADST_FLIPADST:
2518 : case ADST_FLIPADST:
2519 : case FLIPADST_ADST:
2520 : #endif // CONFIG_EXT_TX
2521 0 : av1_inv_txfm2d_add_4x4(input, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
2522 : bd);
2523 0 : break;
2524 : #if CONFIG_EXT_TX
2525 : case V_DCT:
2526 : case H_DCT:
2527 : case V_ADST:
2528 : case H_ADST:
2529 : case V_FLIPADST:
2530 : case H_FLIPADST:
2531 : // Use C version since DST only exists in C code
2532 0 : av1_highbd_iht4x4_16_add_c(input, dest, stride, tx_type, bd);
2533 0 : break;
2534 : case IDTX:
2535 0 : highbd_inv_idtx_add_c(input, dest, stride, 4, tx_type, bd);
2536 0 : break;
2537 : #endif // CONFIG_EXT_TX
2538 0 : default: assert(0); break;
2539 : }
2540 : }
2541 :
2542 0 : void av1_highbd_inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest,
2543 : int stride, int eob, int bd, TX_TYPE tx_type) {
2544 : (void)eob;
2545 0 : av1_highbd_iht4x8_32_add_c(input, dest, stride, tx_type, bd);
2546 0 : }
2547 :
2548 0 : void av1_highbd_inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest,
2549 : int stride, int eob, int bd, TX_TYPE tx_type) {
2550 : (void)eob;
2551 0 : av1_highbd_iht8x4_32_add_c(input, dest, stride, tx_type, bd);
2552 0 : }
2553 :
2554 0 : void av1_highbd_inv_txfm_add_4x16(const tran_low_t *input, uint8_t *dest,
2555 : int stride, int eob, int bd,
2556 : TX_TYPE tx_type) {
2557 : (void)eob;
2558 0 : av1_highbd_iht4x16_64_add_c(input, dest, stride, tx_type, bd);
2559 0 : }
2560 :
2561 0 : void av1_highbd_inv_txfm_add_16x4(const tran_low_t *input, uint8_t *dest,
2562 : int stride, int eob, int bd,
2563 : TX_TYPE tx_type) {
2564 : (void)eob;
2565 0 : av1_highbd_iht16x4_64_add_c(input, dest, stride, tx_type, bd);
2566 0 : }
2567 :
2568 0 : static void highbd_inv_txfm_add_8x16(const tran_low_t *input, uint8_t *dest,
2569 : int stride, int eob, int bd,
2570 : TX_TYPE tx_type) {
2571 : (void)eob;
2572 0 : av1_highbd_iht8x16_128_add_c(input, dest, stride, tx_type, bd);
2573 0 : }
2574 :
2575 0 : static void highbd_inv_txfm_add_16x8(const tran_low_t *input, uint8_t *dest,
2576 : int stride, int eob, int bd,
2577 : TX_TYPE tx_type) {
2578 : (void)eob;
2579 0 : av1_highbd_iht16x8_128_add_c(input, dest, stride, tx_type, bd);
2580 0 : }
2581 :
2582 0 : void av1_highbd_inv_txfm_add_8x32(const tran_low_t *input, uint8_t *dest,
2583 : int stride, int eob, int bd,
2584 : TX_TYPE tx_type) {
2585 : (void)eob;
2586 0 : av1_highbd_iht8x32_256_add_c(input, dest, stride, tx_type, bd);
2587 0 : }
2588 :
2589 0 : void av1_highbd_inv_txfm_add_32x8(const tran_low_t *input, uint8_t *dest,
2590 : int stride, int eob, int bd,
2591 : TX_TYPE tx_type) {
2592 : (void)eob;
2593 0 : av1_highbd_iht32x8_256_add_c(input, dest, stride, tx_type, bd);
2594 0 : }
2595 :
2596 0 : static void highbd_inv_txfm_add_16x32(const tran_low_t *input, uint8_t *dest,
2597 : int stride, int eob, int bd,
2598 : TX_TYPE tx_type) {
2599 : (void)eob;
2600 0 : av1_highbd_iht16x32_512_add_c(input, dest, stride, tx_type, bd);
2601 0 : }
2602 :
2603 0 : static void highbd_inv_txfm_add_32x16(const tran_low_t *input, uint8_t *dest,
2604 : int stride, int eob, int bd,
2605 : TX_TYPE tx_type) {
2606 : (void)eob;
2607 0 : av1_highbd_iht32x16_512_add_c(input, dest, stride, tx_type, bd);
2608 0 : }
2609 :
2610 0 : static void highbd_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest,
2611 : int stride, int eob, int bd,
2612 : TX_TYPE tx_type) {
2613 : (void)eob;
2614 0 : switch (tx_type) {
2615 : case DCT_DCT:
2616 : case ADST_DCT:
2617 : case DCT_ADST:
2618 : case ADST_ADST:
2619 : #if CONFIG_EXT_TX
2620 : case FLIPADST_DCT:
2621 : case DCT_FLIPADST:
2622 : case FLIPADST_FLIPADST:
2623 : case ADST_FLIPADST:
2624 : case FLIPADST_ADST:
2625 : #endif // CONFIG_EXT_TX
2626 0 : av1_inv_txfm2d_add_8x8(input, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
2627 : bd);
2628 0 : break;
2629 : #if CONFIG_EXT_TX
2630 : case V_DCT:
2631 : case H_DCT:
2632 : case V_ADST:
2633 : case H_ADST:
2634 : case V_FLIPADST:
2635 : case H_FLIPADST:
2636 : // Use C version since DST only exists in C code
2637 0 : av1_highbd_iht8x8_64_add_c(input, dest, stride, tx_type, bd);
2638 0 : break;
2639 : case IDTX:
2640 0 : highbd_inv_idtx_add_c(input, dest, stride, 8, tx_type, bd);
2641 0 : break;
2642 : #endif // CONFIG_EXT_TX
2643 0 : default: assert(0); break;
2644 : }
2645 0 : }
2646 :
2647 0 : static void highbd_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest,
2648 : int stride, int eob, int bd,
2649 : TX_TYPE tx_type) {
2650 : (void)eob;
2651 0 : switch (tx_type) {
2652 : case DCT_DCT:
2653 : case ADST_DCT:
2654 : case DCT_ADST:
2655 : case ADST_ADST:
2656 : #if CONFIG_EXT_TX
2657 : case FLIPADST_DCT:
2658 : case DCT_FLIPADST:
2659 : case FLIPADST_FLIPADST:
2660 : case ADST_FLIPADST:
2661 : case FLIPADST_ADST:
2662 : #endif // CONFIG_EXT_TX
2663 0 : av1_inv_txfm2d_add_16x16(input, CONVERT_TO_SHORTPTR(dest), stride,
2664 : tx_type, bd);
2665 0 : break;
2666 : #if CONFIG_EXT_TX
2667 : case V_DCT:
2668 : case H_DCT:
2669 : case V_ADST:
2670 : case H_ADST:
2671 : case V_FLIPADST:
2672 : case H_FLIPADST:
2673 : // Use C version since DST only exists in C code
2674 0 : av1_highbd_iht16x16_256_add_c(input, dest, stride, tx_type, bd);
2675 0 : break;
2676 : case IDTX:
2677 0 : highbd_inv_idtx_add_c(input, dest, stride, 16, tx_type, bd);
2678 0 : break;
2679 : #endif // CONFIG_EXT_TX
2680 0 : default: assert(0); break;
2681 : }
2682 0 : }
2683 :
2684 0 : static void highbd_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest,
2685 : int stride, int eob, int bd,
2686 : TX_TYPE tx_type) {
2687 : (void)eob;
2688 0 : switch (tx_type) {
2689 : case DCT_DCT:
2690 0 : av1_inv_txfm2d_add_32x32(input, CONVERT_TO_SHORTPTR(dest), stride,
2691 : DCT_DCT, bd);
2692 0 : break;
2693 : #if CONFIG_EXT_TX
2694 : case ADST_DCT:
2695 : case DCT_ADST:
2696 : case ADST_ADST:
2697 : case FLIPADST_DCT:
2698 : case DCT_FLIPADST:
2699 : case FLIPADST_FLIPADST:
2700 : case ADST_FLIPADST:
2701 : case FLIPADST_ADST:
2702 : case V_DCT:
2703 : case H_DCT:
2704 : case V_ADST:
2705 : case H_ADST:
2706 : case V_FLIPADST:
2707 : case H_FLIPADST:
2708 0 : highbd_iht32x32_1024_add_c(input, dest, stride, tx_type, bd);
2709 0 : break;
2710 : case IDTX:
2711 0 : highbd_inv_idtx_add_c(input, dest, stride, 32, tx_type, bd);
2712 0 : break;
2713 : #endif // CONFIG_EXT_TX
2714 0 : default: assert(0); break;
2715 : }
2716 0 : }
2717 :
2718 : #if CONFIG_TX64X64
2719 : static void highbd_inv_txfm_add_64x64(const tran_low_t *input, uint8_t *dest,
2720 : int stride, int eob, int bd,
2721 : TX_TYPE tx_type) {
2722 : (void)eob;
2723 : switch (tx_type) {
2724 : case DCT_DCT:
2725 : av1_inv_txfm2d_add_64x64(input, CONVERT_TO_SHORTPTR(dest), stride,
2726 : DCT_DCT, bd);
2727 : break;
2728 : #if CONFIG_EXT_TX
2729 : case ADST_DCT:
2730 : case DCT_ADST:
2731 : case ADST_ADST:
2732 : case FLIPADST_DCT:
2733 : case DCT_FLIPADST:
2734 : case FLIPADST_FLIPADST:
2735 : case ADST_FLIPADST:
2736 : case FLIPADST_ADST:
2737 : case V_DCT:
2738 : case H_DCT:
2739 : case V_ADST:
2740 : case H_ADST:
2741 : case V_FLIPADST:
2742 : case H_FLIPADST:
2743 : highbd_iht64x64_4096_add_c(input, dest, stride, tx_type, bd);
2744 : break;
2745 : case IDTX:
2746 : highbd_inv_idtx_add_c(input, dest, stride, 64, tx_type, bd);
2747 : break;
2748 : #endif // CONFIG_EXT_TX
2749 : default: assert(0); break;
2750 : }
2751 : }
2752 : #endif // CONFIG_TX64X64
2753 : #endif // CONFIG_HIGHBITDEPTH
2754 :
2755 0 : void av1_inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
2756 : INV_TXFM_PARAM *param) {
2757 0 : const TX_TYPE tx_type = param->tx_type;
2758 0 : const TX_SIZE tx_size = param->tx_size;
2759 0 : const int eob = param->eob;
2760 0 : const int lossless = param->lossless;
2761 :
2762 0 : switch (tx_size) {
2763 : #if CONFIG_TX64X64
2764 : case TX_64X64: inv_txfm_add_64x64(input, dest, stride, param); break;
2765 : #endif // CONFIG_TX64X64
2766 0 : case TX_32X32: inv_txfm_add_32x32(input, dest, stride, param); break;
2767 0 : case TX_16X16: inv_txfm_add_16x16(input, dest, stride, param); break;
2768 0 : case TX_8X8: inv_txfm_add_8x8(input, dest, stride, param); break;
2769 0 : case TX_4X8: inv_txfm_add_4x8(input, dest, stride, eob, tx_type); break;
2770 0 : case TX_8X4: inv_txfm_add_8x4(input, dest, stride, eob, tx_type); break;
2771 0 : case TX_8X16: inv_txfm_add_8x16(input, dest, stride, eob, tx_type); break;
2772 0 : case TX_16X8: inv_txfm_add_16x8(input, dest, stride, eob, tx_type); break;
2773 0 : case TX_16X32: inv_txfm_add_16x32(input, dest, stride, eob, tx_type); break;
2774 0 : case TX_32X16: inv_txfm_add_32x16(input, dest, stride, eob, tx_type); break;
2775 : case TX_4X4:
2776 : // this is like av1_short_idct4x4 but has a special case around eob<=1
2777 : // which is significant (not just an optimization) for the lossless
2778 : // case.
2779 0 : inv_txfm_add_4x4(input, dest, stride, eob, tx_type, lossless);
2780 0 : break;
2781 : #if CONFIG_CHROMA_2X2
2782 : case TX_2X2:
2783 : inv_txfm_add_2x2(input, dest, stride, eob, tx_type, lossless);
2784 : break;
2785 : #endif
2786 : #if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
2787 : case TX_32X8: inv_txfm_add_32x8(input, dest, stride, eob, tx_type); break;
2788 : case TX_8X32: inv_txfm_add_8x32(input, dest, stride, eob, tx_type); break;
2789 : case TX_16X4: inv_txfm_add_16x4(input, dest, stride, eob, tx_type); break;
2790 : case TX_4X16: inv_txfm_add_4x16(input, dest, stride, eob, tx_type); break;
2791 : #endif
2792 0 : default: assert(0 && "Invalid transform size"); break;
2793 : }
2794 0 : }
2795 :
2796 0 : static void init_inv_txfm_param(const MACROBLOCKD *xd, TX_SIZE tx_size,
2797 : TX_TYPE tx_type, int eob, INV_TXFM_PARAM *inv) {
2798 0 : inv->tx_type = tx_type;
2799 0 : inv->tx_size = tx_size;
2800 0 : inv->eob = eob;
2801 0 : inv->lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
2802 : #if CONFIG_HIGHBITDEPTH
2803 0 : inv->bd = xd->bd;
2804 : #endif
2805 : #if CONFIG_ADAPT_SCAN
2806 : inv->eob_threshold =
2807 : (const int16_t *)&xd->eob_threshold_md[tx_size][tx_type][0];
2808 : #endif
2809 0 : }
2810 :
2811 0 : void av1_inverse_transform_block(const MACROBLOCKD *xd,
2812 : const tran_low_t *dqcoeff, TX_TYPE tx_type,
2813 : TX_SIZE tx_size, uint8_t *dst, int stride,
2814 : int eob) {
2815 0 : if (!eob) return;
2816 : #if CONFIG_PVQ
2817 : const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
2818 : const int txb_width = block_size_wide[tx_bsize];
2819 : const int txb_height = block_size_high[tx_bsize];
2820 : int r, c;
2821 : #if CONFIG_HIGHBITDEPTH
2822 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
2823 : for (r = 0; r < txb_height; r++)
2824 : for (c = 0; c < txb_width; c++)
2825 : CONVERT_TO_SHORTPTR(dst)[r * stride + c] = 0;
2826 : } else {
2827 : #endif // CONFIG_HIGHBITDEPTH
2828 : for (r = 0; r < txb_height; r++)
2829 : for (c = 0; c < txb_width; c++) dst[r * stride + c] = 0;
2830 : #if CONFIG_HIGHBITDEPTH
2831 : }
2832 : #endif // CONFIG_HIGHBITDEPTH
2833 : #endif // CONFIG_PVQ
2834 : INV_TXFM_PARAM inv_txfm_param;
2835 0 : init_inv_txfm_param(xd, tx_size, tx_type, eob, &inv_txfm_param);
2836 :
2837 : #if CONFIG_HIGHBITDEPTH
2838 0 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
2839 0 : av1_highbd_inv_txfm_add(dqcoeff, dst, stride, &inv_txfm_param);
2840 : } else {
2841 : #endif // CONFIG_HIGHBITDEPTH
2842 0 : av1_inv_txfm_add(dqcoeff, dst, stride, &inv_txfm_param);
2843 : #if CONFIG_HIGHBITDEPTH
2844 : }
2845 : #endif // CONFIG_HIGHBITDEPTH
2846 : }
2847 :
2848 0 : void av1_inverse_transform_block_facade(MACROBLOCKD *xd, int plane, int block,
2849 : int blk_row, int blk_col, int eob) {
2850 0 : struct macroblockd_plane *const pd = &xd->plane[plane];
2851 0 : tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
2852 0 : const PLANE_TYPE plane_type = get_plane_type(plane);
2853 0 : const TX_SIZE tx_size = get_tx_size(plane, xd);
2854 0 : const TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
2855 0 : const int dst_stride = pd->dst.stride;
2856 0 : uint8_t *dst =
2857 0 : &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
2858 0 : av1_inverse_transform_block(xd, dqcoeff, tx_type, tx_size, dst, dst_stride,
2859 : eob);
2860 0 : }
2861 :
2862 : #if CONFIG_HIGHBITDEPTH
2863 0 : void av1_highbd_inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
2864 : INV_TXFM_PARAM *inv_txfm_param) {
2865 0 : const TX_TYPE tx_type = inv_txfm_param->tx_type;
2866 0 : const TX_SIZE tx_size = inv_txfm_param->tx_size;
2867 0 : const int eob = inv_txfm_param->eob;
2868 0 : const int bd = inv_txfm_param->bd;
2869 0 : const int lossless = inv_txfm_param->lossless;
2870 :
2871 0 : switch (tx_size) {
2872 : #if CONFIG_TX64X64
2873 : case TX_64X64:
2874 : highbd_inv_txfm_add_64x64(input, dest, stride, eob, bd, tx_type);
2875 : break;
2876 : #endif // CONFIG_TX64X64
2877 : case TX_32X32:
2878 0 : highbd_inv_txfm_add_32x32(input, dest, stride, eob, bd, tx_type);
2879 0 : break;
2880 : case TX_16X16:
2881 0 : highbd_inv_txfm_add_16x16(input, dest, stride, eob, bd, tx_type);
2882 0 : break;
2883 : case TX_8X8:
2884 0 : highbd_inv_txfm_add_8x8(input, dest, stride, eob, bd, tx_type);
2885 0 : break;
2886 : case TX_4X8:
2887 0 : av1_highbd_inv_txfm_add_4x8(input, dest, stride, eob, bd, tx_type);
2888 0 : break;
2889 : case TX_8X4:
2890 0 : av1_highbd_inv_txfm_add_8x4(input, dest, stride, eob, bd, tx_type);
2891 0 : break;
2892 : case TX_8X16:
2893 0 : highbd_inv_txfm_add_8x16(input, dest, stride, eob, bd, tx_type);
2894 0 : break;
2895 : case TX_16X8:
2896 0 : highbd_inv_txfm_add_16x8(input, dest, stride, eob, bd, tx_type);
2897 0 : break;
2898 : case TX_16X32:
2899 0 : highbd_inv_txfm_add_16x32(input, dest, stride, eob, bd, tx_type);
2900 0 : break;
2901 : case TX_32X16:
2902 0 : highbd_inv_txfm_add_32x16(input, dest, stride, eob, bd, tx_type);
2903 0 : break;
2904 : case TX_4X4:
2905 : // this is like av1_short_idct4x4 but has a special case around eob<=1
2906 : // which is significant (not just an optimization) for the lossless
2907 : // case.
2908 0 : av1_highbd_inv_txfm_add_4x4(input, dest, stride, eob, bd, tx_type,
2909 : lossless);
2910 0 : break;
2911 : #if CONFIG_CHROMA_2X2
2912 : case TX_2X2:
2913 : highbd_inv_txfm_add_2x2(input, dest, stride, eob, bd, tx_type, lossless);
2914 : break;
2915 : #endif
2916 0 : default: assert(0 && "Invalid transform size"); break;
2917 : }
2918 0 : }
2919 : #endif // CONFIG_HIGHBITDEPTH
2920 :
2921 : #if CONFIG_DPCM_INTRA
2922 : void av1_dpcm_inv_txfm_add_4_c(const tran_low_t *input, int stride,
2923 : TX_TYPE_1D tx_type, uint8_t *dest) {
2924 : assert(tx_type < TX_TYPES_1D);
2925 : static const transform_1d IHT[] = { aom_idct4_c, aom_iadst4_c, aom_iadst4_c,
2926 : iidtx4_c };
2927 : const transform_1d inv_tx = IHT[tx_type];
2928 : tran_low_t out[4];
2929 : inv_tx(input, out);
2930 : for (int i = 0; i < 4; ++i) {
2931 : out[i] = (tran_low_t)dct_const_round_shift(out[i] * Sqrt2);
2932 : dest[i * stride] =
2933 : clip_pixel_add(dest[i * stride], ROUND_POWER_OF_TWO(out[i], 4));
2934 : }
2935 : }
2936 :
2937 : void av1_dpcm_inv_txfm_add_8_c(const tran_low_t *input, int stride,
2938 : TX_TYPE_1D tx_type, uint8_t *dest) {
2939 : assert(tx_type < TX_TYPES_1D);
2940 : static const transform_1d IHT[] = { aom_idct8_c, aom_iadst8_c, aom_iadst8_c,
2941 : iidtx8_c };
2942 : const transform_1d inv_tx = IHT[tx_type];
2943 : tran_low_t out[8];
2944 : inv_tx(input, out);
2945 : for (int i = 0; i < 8; ++i) {
2946 : dest[i * stride] =
2947 : clip_pixel_add(dest[i * stride], ROUND_POWER_OF_TWO(out[i], 4));
2948 : }
2949 : }
2950 :
2951 : void av1_dpcm_inv_txfm_add_16_c(const tran_low_t *input, int stride,
2952 : TX_TYPE_1D tx_type, uint8_t *dest) {
2953 : assert(tx_type < TX_TYPES_1D);
2954 : static const transform_1d IHT[] = { aom_idct16_c, aom_iadst16_c,
2955 : aom_iadst16_c, iidtx16_c };
2956 : const transform_1d inv_tx = IHT[tx_type];
2957 : tran_low_t out[16];
2958 : inv_tx(input, out);
2959 : for (int i = 0; i < 16; ++i) {
2960 : out[i] = (tran_low_t)dct_const_round_shift(out[i] * Sqrt2);
2961 : dest[i * stride] =
2962 : clip_pixel_add(dest[i * stride], ROUND_POWER_OF_TWO(out[i], 5));
2963 : }
2964 : }
2965 :
2966 : void av1_dpcm_inv_txfm_add_32_c(const tran_low_t *input, int stride,
2967 : TX_TYPE_1D tx_type, uint8_t *dest) {
2968 : assert(tx_type < TX_TYPES_1D);
2969 : static const transform_1d IHT[] = { aom_idct32_c, ihalfright32_c,
2970 : ihalfright32_c, iidtx32_c };
2971 : const transform_1d inv_tx = IHT[tx_type];
2972 : tran_low_t out[32];
2973 : inv_tx(input, out);
2974 : for (int i = 0; i < 32; ++i) {
2975 : dest[i * stride] =
2976 : clip_pixel_add(dest[i * stride], ROUND_POWER_OF_TWO(out[i], 4));
2977 : }
2978 : }
2979 :
2980 : dpcm_inv_txfm_add_func av1_get_dpcm_inv_txfm_add_func(int tx_length) {
2981 : switch (tx_length) {
2982 : case 4: return av1_dpcm_inv_txfm_add_4_c;
2983 : case 8: return av1_dpcm_inv_txfm_add_8_c;
2984 : case 16: return av1_dpcm_inv_txfm_add_16_c;
2985 : case 32:
2986 : return av1_dpcm_inv_txfm_add_32_c;
2987 : // TODO(huisu): add support for TX_64X64.
2988 : default: assert(0); return NULL;
2989 : }
2990 : }
2991 :
2992 : #if CONFIG_HIGHBITDEPTH
2993 : void av1_hbd_dpcm_inv_txfm_add_4_c(const tran_low_t *input, int stride,
2994 : TX_TYPE_1D tx_type, int bd, uint16_t *dest) {
2995 : assert(tx_type < TX_TYPES_1D);
2996 : static const highbd_transform_1d IHT[] = { aom_highbd_idct4_c,
2997 : aom_highbd_iadst4_c,
2998 : aom_highbd_iadst4_c,
2999 : highbd_iidtx4_c };
3000 : const highbd_transform_1d inv_tx = IHT[tx_type];
3001 : tran_low_t out[4];
3002 : inv_tx(input, out, bd);
3003 : for (int i = 0; i < 4; ++i) {
3004 : out[i] = (tran_low_t)dct_const_round_shift(out[i] * Sqrt2);
3005 : dest[i * stride] = highbd_clip_pixel_add(dest[i * stride],
3006 : ROUND_POWER_OF_TWO(out[i], 4), bd);
3007 : }
3008 : }
3009 :
3010 : void av1_hbd_dpcm_inv_txfm_add_8_c(const tran_low_t *input, int stride,
3011 : TX_TYPE_1D tx_type, int bd, uint16_t *dest) {
3012 : static const highbd_transform_1d IHT[] = { aom_highbd_idct8_c,
3013 : aom_highbd_iadst8_c,
3014 : aom_highbd_iadst8_c,
3015 : highbd_iidtx8_c };
3016 : assert(tx_type < TX_TYPES_1D);
3017 : const highbd_transform_1d inv_tx = IHT[tx_type];
3018 : tran_low_t out[8];
3019 : inv_tx(input, out, bd);
3020 : for (int i = 0; i < 8; ++i) {
3021 : dest[i * stride] = highbd_clip_pixel_add(dest[i * stride],
3022 : ROUND_POWER_OF_TWO(out[i], 4), bd);
3023 : }
3024 : }
3025 :
3026 : void av1_hbd_dpcm_inv_txfm_add_16_c(const tran_low_t *input, int stride,
3027 : TX_TYPE_1D tx_type, int bd,
3028 : uint16_t *dest) {
3029 : assert(tx_type < TX_TYPES_1D);
3030 : static const highbd_transform_1d IHT[] = { aom_highbd_idct16_c,
3031 : aom_highbd_iadst16_c,
3032 : aom_highbd_iadst16_c,
3033 : highbd_iidtx16_c };
3034 : const highbd_transform_1d inv_tx = IHT[tx_type];
3035 : tran_low_t out[16];
3036 : inv_tx(input, out, bd);
3037 : for (int i = 0; i < 16; ++i) {
3038 : out[i] = (tran_low_t)dct_const_round_shift(out[i] * Sqrt2);
3039 : dest[i * stride] = highbd_clip_pixel_add(dest[i * stride],
3040 : ROUND_POWER_OF_TWO(out[i], 5), bd);
3041 : }
3042 : }
3043 :
3044 : void av1_hbd_dpcm_inv_txfm_add_32_c(const tran_low_t *input, int stride,
3045 : TX_TYPE_1D tx_type, int bd,
3046 : uint16_t *dest) {
3047 : assert(tx_type < TX_TYPES_1D);
3048 : static const highbd_transform_1d IHT[] = { aom_highbd_idct32_c,
3049 : highbd_ihalfright32_c,
3050 : highbd_ihalfright32_c,
3051 : highbd_iidtx32_c };
3052 : const highbd_transform_1d inv_tx = IHT[tx_type];
3053 : tran_low_t out[32];
3054 : inv_tx(input, out, bd);
3055 : for (int i = 0; i < 32; ++i) {
3056 : dest[i * stride] = highbd_clip_pixel_add(dest[i * stride],
3057 : ROUND_POWER_OF_TWO(out[i], 4), bd);
3058 : }
3059 : }
3060 :
3061 : hbd_dpcm_inv_txfm_add_func av1_get_hbd_dpcm_inv_txfm_add_func(int tx_length) {
3062 : switch (tx_length) {
3063 : case 4: return av1_hbd_dpcm_inv_txfm_add_4_c;
3064 : case 8: return av1_hbd_dpcm_inv_txfm_add_8_c;
3065 : case 16: return av1_hbd_dpcm_inv_txfm_add_16_c;
3066 : case 32:
3067 : return av1_hbd_dpcm_inv_txfm_add_32_c;
3068 : // TODO(huisu): add support for TX_64X64.
3069 : default: assert(0); return NULL;
3070 : }
3071 : }
3072 : #endif // CONFIG_HIGHBITDEPTH
3073 : #endif // CONFIG_DPCM_INTRA
|