Line data Source code
1 : /*
2 : * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 : *
4 : * This source code is subject to the terms of the BSD 2 Clause License and
5 : * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 : * was not distributed with this source code in the LICENSE file, you can
7 : * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 : * Media Patent License 1.0 was not distributed with this source code in the
9 : * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 : */
11 :
12 : #include "./av1_rtcd.h"
13 : #include "av1/common/enums.h"
14 : #include "av1/common/av1_txfm.h"
15 : #include "av1/common/x86/av1_txfm1d_sse4.h"
16 :
17 0 : static INLINE void int16_array_with_stride_to_int32_array_without_stride(
18 : const int16_t *input, int stride, int32_t *output, int txfm1d_size) {
19 : int r, c;
20 0 : for (r = 0; r < txfm1d_size; r++) {
21 0 : for (c = 0; c < txfm1d_size; c++) {
22 0 : output[r * txfm1d_size + c] = (int32_t)input[r * stride + c];
23 : }
24 : }
25 0 : }
26 :
27 : typedef void (*TxfmFuncSSE2)(const __m128i *input, __m128i *output,
28 : const int8_t *cos_bit, const int8_t *stage_range);
29 :
30 0 : static INLINE TxfmFuncSSE2 fwd_txfm_type_to_func(TXFM_TYPE txfm_type) {
31 0 : switch (txfm_type) {
32 0 : case TXFM_TYPE_DCT32: return av1_fdct32_new_sse4_1; break;
33 0 : case TXFM_TYPE_ADST32: return av1_fadst32_new_sse4_1; break;
34 0 : default: assert(0);
35 : }
36 : return NULL;
37 : }
38 :
39 0 : static INLINE void fwd_txfm2d_sse4_1(const int16_t *input, int32_t *output,
40 : const int stride,
41 : const TXFM_2D_FLIP_CFG *cfg,
42 : int32_t *txfm_buf) {
43 : // TODO(sarahparker) must correct for rectangular transforms in follow up
44 0 : const int txfm_size = cfg->row_cfg->txfm_size;
45 0 : const int8_t *shift = cfg->row_cfg->shift;
46 0 : const int8_t *stage_range_col = cfg->col_cfg->stage_range;
47 0 : const int8_t *stage_range_row = cfg->row_cfg->stage_range;
48 0 : const int8_t *cos_bit_col = cfg->col_cfg->cos_bit;
49 0 : const int8_t *cos_bit_row = cfg->row_cfg->cos_bit;
50 0 : const TxfmFuncSSE2 txfm_func_col =
51 0 : fwd_txfm_type_to_func(cfg->col_cfg->txfm_type);
52 0 : const TxfmFuncSSE2 txfm_func_row =
53 0 : fwd_txfm_type_to_func(cfg->row_cfg->txfm_type);
54 :
55 0 : __m128i *buf_128 = (__m128i *)txfm_buf;
56 0 : __m128i *out_128 = (__m128i *)output;
57 0 : int num_per_128 = 4;
58 0 : int txfm2d_size_128 = txfm_size * txfm_size / num_per_128;
59 :
60 0 : int16_array_with_stride_to_int32_array_without_stride(input, stride, txfm_buf,
61 : txfm_size);
62 0 : round_shift_array_32_sse4_1(buf_128, out_128, txfm2d_size_128, -shift[0]);
63 0 : txfm_func_col(out_128, buf_128, cos_bit_col, stage_range_col);
64 0 : round_shift_array_32_sse4_1(buf_128, out_128, txfm2d_size_128, -shift[1]);
65 0 : transpose_32(txfm_size, out_128, buf_128);
66 0 : txfm_func_row(buf_128, out_128, cos_bit_row, stage_range_row);
67 0 : round_shift_array_32_sse4_1(out_128, buf_128, txfm2d_size_128, -shift[2]);
68 0 : transpose_32(txfm_size, buf_128, out_128);
69 0 : }
70 :
71 0 : void av1_fwd_txfm2d_32x32_sse4_1(const int16_t *input, int32_t *output,
72 : int stride, int tx_type, int bd) {
73 : DECLARE_ALIGNED(16, int32_t, txfm_buf[1024]);
74 0 : TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_32X32);
75 : (void)bd;
76 0 : fwd_txfm2d_sse4_1(input, output, stride, &cfg, txfm_buf);
77 0 : }
78 :
79 0 : void av1_fwd_txfm2d_64x64_sse4_1(const int16_t *input, int32_t *output,
80 : int stride, int tx_type, int bd) {
81 : DECLARE_ALIGNED(16, int32_t, txfm_buf[4096]);
82 0 : TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_64x64_cfg(tx_type);
83 : (void)bd;
84 0 : fwd_txfm2d_sse4_1(input, output, stride, &cfg, txfm_buf);
85 0 : }
|