Line data Source code
1 : /*
2 : * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 : *
4 : * Use of this source code is governed by a BSD-style license
5 : * that can be found in the LICENSE file in the root of the source
6 : * tree. An additional intellectual property rights grant can be found
7 : * in the file PATENTS. All contributing project authors may
8 : * be found in the AUTHORS file in the root of the source tree.
9 : */
10 :
11 : #include <math.h>
12 :
13 : #include "./vp9_rtcd.h"
14 : #include "./vpx_dsp_rtcd.h"
15 : #include "vp9/common/vp9_blockd.h"
16 : #include "vp9/common/vp9_idct.h"
17 : #include "vpx_dsp/inv_txfm.h"
18 : #include "vpx_ports/mem.h"
19 :
20 0 : void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
21 : int tx_type) {
22 0 : const transform_2d IHT_4[] = {
23 : { idct4_c, idct4_c }, // DCT_DCT = 0
24 : { iadst4_c, idct4_c }, // ADST_DCT = 1
25 : { idct4_c, iadst4_c }, // DCT_ADST = 2
26 : { iadst4_c, iadst4_c } // ADST_ADST = 3
27 : };
28 :
29 : int i, j;
30 : tran_low_t out[4 * 4];
31 0 : tran_low_t *outptr = out;
32 : tran_low_t temp_in[4], temp_out[4];
33 :
34 : // inverse transform row vectors
35 0 : for (i = 0; i < 4; ++i) {
36 0 : IHT_4[tx_type].rows(input, outptr);
37 0 : input += 4;
38 0 : outptr += 4;
39 : }
40 :
41 : // inverse transform column vectors
42 0 : for (i = 0; i < 4; ++i) {
43 0 : for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i];
44 0 : IHT_4[tx_type].cols(temp_in, temp_out);
45 0 : for (j = 0; j < 4; ++j) {
46 0 : dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
47 0 : ROUND_POWER_OF_TWO(temp_out[j], 4));
48 : }
49 : }
50 0 : }
51 :
52 : static const transform_2d IHT_8[] = {
53 : { idct8_c, idct8_c }, // DCT_DCT = 0
54 : { iadst8_c, idct8_c }, // ADST_DCT = 1
55 : { idct8_c, iadst8_c }, // DCT_ADST = 2
56 : { iadst8_c, iadst8_c } // ADST_ADST = 3
57 : };
58 :
59 0 : void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
60 : int tx_type) {
61 : int i, j;
62 : tran_low_t out[8 * 8];
63 0 : tran_low_t *outptr = out;
64 : tran_low_t temp_in[8], temp_out[8];
65 0 : const transform_2d ht = IHT_8[tx_type];
66 :
67 : // inverse transform row vectors
68 0 : for (i = 0; i < 8; ++i) {
69 0 : ht.rows(input, outptr);
70 0 : input += 8;
71 0 : outptr += 8;
72 : }
73 :
74 : // inverse transform column vectors
75 0 : for (i = 0; i < 8; ++i) {
76 0 : for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i];
77 0 : ht.cols(temp_in, temp_out);
78 0 : for (j = 0; j < 8; ++j) {
79 0 : dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
80 0 : ROUND_POWER_OF_TWO(temp_out[j], 5));
81 : }
82 : }
83 0 : }
84 :
85 : static const transform_2d IHT_16[] = {
86 : { idct16_c, idct16_c }, // DCT_DCT = 0
87 : { iadst16_c, idct16_c }, // ADST_DCT = 1
88 : { idct16_c, iadst16_c }, // DCT_ADST = 2
89 : { iadst16_c, iadst16_c } // ADST_ADST = 3
90 : };
91 :
92 0 : void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
93 : int tx_type) {
94 : int i, j;
95 : tran_low_t out[16 * 16];
96 0 : tran_low_t *outptr = out;
97 : tran_low_t temp_in[16], temp_out[16];
98 0 : const transform_2d ht = IHT_16[tx_type];
99 :
100 : // Rows
101 0 : for (i = 0; i < 16; ++i) {
102 0 : ht.rows(input, outptr);
103 0 : input += 16;
104 0 : outptr += 16;
105 : }
106 :
107 : // Columns
108 0 : for (i = 0; i < 16; ++i) {
109 0 : for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i];
110 0 : ht.cols(temp_in, temp_out);
111 0 : for (j = 0; j < 16; ++j) {
112 0 : dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
113 0 : ROUND_POWER_OF_TWO(temp_out[j], 6));
114 : }
115 : }
116 0 : }
117 :
118 : // idct
119 0 : void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
120 : int eob) {
121 0 : if (eob > 1)
122 0 : vpx_idct4x4_16_add(input, dest, stride);
123 : else
124 0 : vpx_idct4x4_1_add(input, dest, stride);
125 0 : }
126 :
127 0 : void vp9_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
128 : int eob) {
129 0 : if (eob > 1)
130 0 : vpx_iwht4x4_16_add(input, dest, stride);
131 : else
132 0 : vpx_iwht4x4_1_add(input, dest, stride);
133 0 : }
134 :
135 0 : void vp9_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
136 : int eob) {
137 : // If dc is 1, then input[0] is the reconstructed value, do not need
138 : // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
139 :
140 : // The calculation can be simplified if there are not many non-zero dct
141 : // coefficients. Use eobs to decide what to do.
142 0 : if (eob == 1)
143 : // DC only DCT coefficient
144 0 : vpx_idct8x8_1_add(input, dest, stride);
145 0 : else if (eob <= 12)
146 0 : vpx_idct8x8_12_add(input, dest, stride);
147 : else
148 0 : vpx_idct8x8_64_add(input, dest, stride);
149 0 : }
150 :
151 0 : void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
152 : int eob) {
153 : /* The calculation can be simplified if there are not many non-zero dct
154 : * coefficients. Use eobs to separate different cases. */
155 0 : if (eob == 1) /* DC only DCT coefficient. */
156 0 : vpx_idct16x16_1_add(input, dest, stride);
157 0 : else if (eob <= 10)
158 0 : vpx_idct16x16_10_add(input, dest, stride);
159 : else
160 0 : vpx_idct16x16_256_add(input, dest, stride);
161 0 : }
162 :
163 0 : void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
164 : int eob) {
165 0 : if (eob == 1)
166 0 : vpx_idct32x32_1_add(input, dest, stride);
167 0 : else if (eob <= 34)
168 : // non-zero coeff only in upper-left 8x8
169 0 : vpx_idct32x32_34_add(input, dest, stride);
170 0 : else if (eob <= 135)
171 : // non-zero coeff only in upper-left 16x16
172 0 : vpx_idct32x32_135_add(input, dest, stride);
173 : else
174 0 : vpx_idct32x32_1024_add(input, dest, stride);
175 0 : }
176 :
177 : // iht
178 0 : void vp9_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
179 : int stride, int eob) {
180 0 : if (tx_type == DCT_DCT)
181 0 : vp9_idct4x4_add(input, dest, stride, eob);
182 : else
183 0 : vp9_iht4x4_16_add(input, dest, stride, tx_type);
184 0 : }
185 :
186 0 : void vp9_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
187 : int stride, int eob) {
188 0 : if (tx_type == DCT_DCT) {
189 0 : vp9_idct8x8_add(input, dest, stride, eob);
190 : } else {
191 0 : vp9_iht8x8_64_add(input, dest, stride, tx_type);
192 : }
193 0 : }
194 :
195 0 : void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
196 : int stride, int eob) {
197 0 : if (tx_type == DCT_DCT) {
198 0 : vp9_idct16x16_add(input, dest, stride, eob);
199 : } else {
200 0 : vp9_iht16x16_256_add(input, dest, stride, tx_type);
201 : }
202 0 : }
203 :
204 : #if CONFIG_VP9_HIGHBITDEPTH
205 :
206 : void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
207 : int stride, int tx_type, int bd) {
208 : const highbd_transform_2d IHT_4[] = {
209 : { vpx_highbd_idct4_c, vpx_highbd_idct4_c }, // DCT_DCT = 0
210 : { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // ADST_DCT = 1
211 : { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_ADST = 2
212 : { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c } // ADST_ADST = 3
213 : };
214 : uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
215 :
216 : int i, j;
217 : tran_low_t out[4 * 4];
218 : tran_low_t *outptr = out;
219 : tran_low_t temp_in[4], temp_out[4];
220 :
221 : // Inverse transform row vectors.
222 : for (i = 0; i < 4; ++i) {
223 : IHT_4[tx_type].rows(input, outptr, bd);
224 : input += 4;
225 : outptr += 4;
226 : }
227 :
228 : // Inverse transform column vectors.
229 : for (i = 0; i < 4; ++i) {
230 : for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i];
231 : IHT_4[tx_type].cols(temp_in, temp_out, bd);
232 : for (j = 0; j < 4; ++j) {
233 : dest[j * stride + i] = highbd_clip_pixel_add(
234 : dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd);
235 : }
236 : }
237 : }
238 :
239 : static const highbd_transform_2d HIGH_IHT_8[] = {
240 : { vpx_highbd_idct8_c, vpx_highbd_idct8_c }, // DCT_DCT = 0
241 : { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // ADST_DCT = 1
242 : { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_ADST = 2
243 : { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c } // ADST_ADST = 3
244 : };
245 :
246 : void vp9_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8,
247 : int stride, int tx_type, int bd) {
248 : int i, j;
249 : tran_low_t out[8 * 8];
250 : tran_low_t *outptr = out;
251 : tran_low_t temp_in[8], temp_out[8];
252 : const highbd_transform_2d ht = HIGH_IHT_8[tx_type];
253 : uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
254 :
255 : // Inverse transform row vectors.
256 : for (i = 0; i < 8; ++i) {
257 : ht.rows(input, outptr, bd);
258 : input += 8;
259 : outptr += 8;
260 : }
261 :
262 : // Inverse transform column vectors.
263 : for (i = 0; i < 8; ++i) {
264 : for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i];
265 : ht.cols(temp_in, temp_out, bd);
266 : for (j = 0; j < 8; ++j) {
267 : dest[j * stride + i] = highbd_clip_pixel_add(
268 : dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);
269 : }
270 : }
271 : }
272 :
273 : static const highbd_transform_2d HIGH_IHT_16[] = {
274 : { vpx_highbd_idct16_c, vpx_highbd_idct16_c }, // DCT_DCT = 0
275 : { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // ADST_DCT = 1
276 : { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_ADST = 2
277 : { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c } // ADST_ADST = 3
278 : };
279 :
280 : void vp9_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8,
281 : int stride, int tx_type, int bd) {
282 : int i, j;
283 : tran_low_t out[16 * 16];
284 : tran_low_t *outptr = out;
285 : tran_low_t temp_in[16], temp_out[16];
286 : const highbd_transform_2d ht = HIGH_IHT_16[tx_type];
287 : uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
288 :
289 : // Rows
290 : for (i = 0; i < 16; ++i) {
291 : ht.rows(input, outptr, bd);
292 : input += 16;
293 : outptr += 16;
294 : }
295 :
296 : // Columns
297 : for (i = 0; i < 16; ++i) {
298 : for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i];
299 : ht.cols(temp_in, temp_out, bd);
300 : for (j = 0; j < 16; ++j) {
301 : dest[j * stride + i] = highbd_clip_pixel_add(
302 : dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
303 : }
304 : }
305 : }
306 :
307 : // idct
308 : void vp9_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
309 : int eob, int bd) {
310 : if (eob > 1)
311 : vpx_highbd_idct4x4_16_add(input, dest, stride, bd);
312 : else
313 : vpx_highbd_idct4x4_1_add(input, dest, stride, bd);
314 : }
315 :
316 : void vp9_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
317 : int eob, int bd) {
318 : if (eob > 1)
319 : vpx_highbd_iwht4x4_16_add(input, dest, stride, bd);
320 : else
321 : vpx_highbd_iwht4x4_1_add(input, dest, stride, bd);
322 : }
323 :
324 : void vp9_highbd_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
325 : int eob, int bd) {
326 : // If dc is 1, then input[0] is the reconstructed value, do not need
327 : // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
328 :
329 : // The calculation can be simplified if there are not many non-zero dct
330 : // coefficients. Use eobs to decide what to do.
331 : // DC only DCT coefficient
332 : if (eob == 1) {
333 : vpx_highbd_idct8x8_1_add(input, dest, stride, bd);
334 : } else if (eob <= 12) {
335 : vpx_highbd_idct8x8_12_add(input, dest, stride, bd);
336 : } else {
337 : vpx_highbd_idct8x8_64_add(input, dest, stride, bd);
338 : }
339 : }
340 :
341 : void vp9_highbd_idct16x16_add(const tran_low_t *input, uint8_t *dest,
342 : int stride, int eob, int bd) {
343 : // The calculation can be simplified if there are not many non-zero dct
344 : // coefficients. Use eobs to separate different cases.
345 : // DC only DCT coefficient.
346 : if (eob == 1) {
347 : vpx_highbd_idct16x16_1_add(input, dest, stride, bd);
348 : } else if (eob <= 10) {
349 : vpx_highbd_idct16x16_10_add(input, dest, stride, bd);
350 : } else {
351 : vpx_highbd_idct16x16_256_add(input, dest, stride, bd);
352 : }
353 : }
354 :
355 : void vp9_highbd_idct32x32_add(const tran_low_t *input, uint8_t *dest,
356 : int stride, int eob, int bd) {
357 : // Non-zero coeff only in upper-left 8x8
358 : if (eob == 1) {
359 : vpx_highbd_idct32x32_1_add(input, dest, stride, bd);
360 : } else if (eob <= 34) {
361 : vpx_highbd_idct32x32_34_add(input, dest, stride, bd);
362 : } else {
363 : vpx_highbd_idct32x32_1024_add(input, dest, stride, bd);
364 : }
365 : }
366 :
367 : // iht
368 : void vp9_highbd_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input,
369 : uint8_t *dest, int stride, int eob, int bd) {
370 : if (tx_type == DCT_DCT)
371 : vp9_highbd_idct4x4_add(input, dest, stride, eob, bd);
372 : else
373 : vp9_highbd_iht4x4_16_add(input, dest, stride, tx_type, bd);
374 : }
375 :
376 : void vp9_highbd_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input,
377 : uint8_t *dest, int stride, int eob, int bd) {
378 : if (tx_type == DCT_DCT) {
379 : vp9_highbd_idct8x8_add(input, dest, stride, eob, bd);
380 : } else {
381 : vp9_highbd_iht8x8_64_add(input, dest, stride, tx_type, bd);
382 : }
383 : }
384 :
385 : void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input,
386 : uint8_t *dest, int stride, int eob, int bd) {
387 : if (tx_type == DCT_DCT) {
388 : vp9_highbd_idct16x16_add(input, dest, stride, eob, bd);
389 : } else {
390 : vp9_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd);
391 : }
392 : }
393 : #endif // CONFIG_VP9_HIGHBITDEPTH
|