Line data Source code
1 : /*
2 : * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 : *
4 : * This source code is subject to the terms of the BSD 2 Clause License and
5 : * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 : * was not distributed with this source code in the LICENSE file, you can
7 : * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 : * Media Patent License 1.0 was not distributed with this source code in the
9 : * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 : */
11 :
12 : #include "./aom_config.h"
13 : #include "./aom_dsp_rtcd.h"
14 : #include "aom_dsp/x86/convolve.h"
15 :
16 : #if HAVE_SSE2
17 : filter8_1dfunction aom_filter_block1d16_v8_sse2;
18 : filter8_1dfunction aom_filter_block1d16_h8_sse2;
19 : filter8_1dfunction aom_filter_block1d8_v8_sse2;
20 : filter8_1dfunction aom_filter_block1d8_h8_sse2;
21 : filter8_1dfunction aom_filter_block1d4_v8_sse2;
22 : filter8_1dfunction aom_filter_block1d4_h8_sse2;
23 : filter8_1dfunction aom_filter_block1d16_v8_avg_sse2;
24 : filter8_1dfunction aom_filter_block1d16_h8_avg_sse2;
25 : filter8_1dfunction aom_filter_block1d8_v8_avg_sse2;
26 : filter8_1dfunction aom_filter_block1d8_h8_avg_sse2;
27 : filter8_1dfunction aom_filter_block1d4_v8_avg_sse2;
28 : filter8_1dfunction aom_filter_block1d4_h8_avg_sse2;
29 :
30 : filter8_1dfunction aom_filter_block1d16_v2_sse2;
31 : filter8_1dfunction aom_filter_block1d16_h2_sse2;
32 : filter8_1dfunction aom_filter_block1d8_v2_sse2;
33 : filter8_1dfunction aom_filter_block1d8_h2_sse2;
34 : filter8_1dfunction aom_filter_block1d4_v2_sse2;
35 : filter8_1dfunction aom_filter_block1d4_h2_sse2;
36 : filter8_1dfunction aom_filter_block1d16_v2_avg_sse2;
37 : filter8_1dfunction aom_filter_block1d16_h2_avg_sse2;
38 : filter8_1dfunction aom_filter_block1d8_v2_avg_sse2;
39 : filter8_1dfunction aom_filter_block1d8_h2_avg_sse2;
40 : filter8_1dfunction aom_filter_block1d4_v2_avg_sse2;
41 : filter8_1dfunction aom_filter_block1d4_h2_avg_sse2;
42 :
43 : // void aom_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
44 : // uint8_t *dst, ptrdiff_t dst_stride,
45 : // const int16_t *filter_x, int x_step_q4,
46 : // const int16_t *filter_y, int y_step_q4,
47 : // int w, int h);
48 : // void aom_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
49 : // uint8_t *dst, ptrdiff_t dst_stride,
50 : // const int16_t *filter_x, int x_step_q4,
51 : // const int16_t *filter_y, int y_step_q4,
52 : // int w, int h);
53 : // void aom_convolve8_avg_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
54 : // uint8_t *dst, ptrdiff_t dst_stride,
55 : // const int16_t *filter_x, int x_step_q4,
56 : // const int16_t *filter_y, int y_step_q4,
57 : // int w, int h);
58 : // void aom_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
59 : // uint8_t *dst, ptrdiff_t dst_stride,
60 : // const int16_t *filter_x, int x_step_q4,
61 : // const int16_t *filter_y, int y_step_q4,
62 : // int w, int h);
63 0 : FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , sse2);
64 0 : FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , sse2);
65 0 : FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, sse2);
66 0 : FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, sse2);
67 :
68 : // void aom_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride,
69 : // uint8_t *dst, ptrdiff_t dst_stride,
70 : // const int16_t *filter_x, int x_step_q4,
71 : // const int16_t *filter_y, int y_step_q4,
72 : // int w, int h);
73 : // void aom_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride,
74 : // uint8_t *dst, ptrdiff_t dst_stride,
75 : // const int16_t *filter_x, int x_step_q4,
76 : // const int16_t *filter_y, int y_step_q4,
77 : // int w, int h);
78 0 : FUN_CONV_2D(, sse2);
79 0 : FUN_CONV_2D(avg_, sse2);
80 :
81 : #if CONFIG_HIGHBITDEPTH && ARCH_X86_64
82 : highbd_filter8_1dfunction aom_highbd_filter_block1d16_v8_sse2;
83 : highbd_filter8_1dfunction aom_highbd_filter_block1d16_h8_sse2;
84 : highbd_filter8_1dfunction aom_highbd_filter_block1d8_v8_sse2;
85 : highbd_filter8_1dfunction aom_highbd_filter_block1d8_h8_sse2;
86 : highbd_filter8_1dfunction aom_highbd_filter_block1d4_v8_sse2;
87 : highbd_filter8_1dfunction aom_highbd_filter_block1d4_h8_sse2;
88 : highbd_filter8_1dfunction aom_highbd_filter_block1d16_v8_avg_sse2;
89 : highbd_filter8_1dfunction aom_highbd_filter_block1d16_h8_avg_sse2;
90 : highbd_filter8_1dfunction aom_highbd_filter_block1d8_v8_avg_sse2;
91 : highbd_filter8_1dfunction aom_highbd_filter_block1d8_h8_avg_sse2;
92 : highbd_filter8_1dfunction aom_highbd_filter_block1d4_v8_avg_sse2;
93 : highbd_filter8_1dfunction aom_highbd_filter_block1d4_h8_avg_sse2;
94 :
95 : highbd_filter8_1dfunction aom_highbd_filter_block1d16_v2_sse2;
96 : highbd_filter8_1dfunction aom_highbd_filter_block1d16_h2_sse2;
97 : highbd_filter8_1dfunction aom_highbd_filter_block1d8_v2_sse2;
98 : highbd_filter8_1dfunction aom_highbd_filter_block1d8_h2_sse2;
99 : highbd_filter8_1dfunction aom_highbd_filter_block1d4_v2_sse2;
100 : highbd_filter8_1dfunction aom_highbd_filter_block1d4_h2_sse2;
101 : highbd_filter8_1dfunction aom_highbd_filter_block1d16_v2_avg_sse2;
102 : highbd_filter8_1dfunction aom_highbd_filter_block1d16_h2_avg_sse2;
103 : highbd_filter8_1dfunction aom_highbd_filter_block1d8_v2_avg_sse2;
104 : highbd_filter8_1dfunction aom_highbd_filter_block1d8_h2_avg_sse2;
105 : highbd_filter8_1dfunction aom_highbd_filter_block1d4_v2_avg_sse2;
106 : highbd_filter8_1dfunction aom_highbd_filter_block1d4_h2_avg_sse2;
107 :
108 : // void aom_highbd_convolve8_horiz_sse2(const uint8_t *src,
109 : // ptrdiff_t src_stride,
110 : // uint8_t *dst,
111 : // ptrdiff_t dst_stride,
112 : // const int16_t *filter_x,
113 : // int x_step_q4,
114 : // const int16_t *filter_y,
115 : // int y_step_q4,
116 : // int w, int h, int bd);
117 : // void aom_highbd_convolve8_vert_sse2(const uint8_t *src,
118 : // ptrdiff_t src_stride,
119 : // uint8_t *dst,
120 : // ptrdiff_t dst_stride,
121 : // const int16_t *filter_x,
122 : // int x_step_q4,
123 : // const int16_t *filter_y,
124 : // int y_step_q4,
125 : // int w, int h, int bd);
126 : // void aom_highbd_convolve8_avg_horiz_sse2(const uint8_t *src,
127 : // ptrdiff_t src_stride,
128 : // uint8_t *dst,
129 : // ptrdiff_t dst_stride,
130 : // const int16_t *filter_x,
131 : // int x_step_q4,
132 : // const int16_t *filter_y,
133 : // int y_step_q4,
134 : // int w, int h, int bd);
135 : // void aom_highbd_convolve8_avg_vert_sse2(const uint8_t *src,
136 : // ptrdiff_t src_stride,
137 : // uint8_t *dst,
138 : // ptrdiff_t dst_stride,
139 : // const int16_t *filter_x,
140 : // int x_step_q4,
141 : // const int16_t *filter_y,
142 : // int y_step_q4,
143 : // int w, int h, int bd);
144 0 : HIGH_FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , sse2);
145 0 : HIGH_FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , sse2);
146 0 : HIGH_FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, sse2);
147 0 : HIGH_FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_,
148 : sse2);
149 :
150 : // void aom_highbd_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride,
151 : // uint8_t *dst, ptrdiff_t dst_stride,
152 : // const int16_t *filter_x, int x_step_q4,
153 : // const int16_t *filter_y, int y_step_q4,
154 : // int w, int h, int bd);
155 : // void aom_highbd_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride,
156 : // uint8_t *dst, ptrdiff_t dst_stride,
157 : // const int16_t *filter_x, int x_step_q4,
158 : // const int16_t *filter_y, int y_step_q4,
159 : // int w, int h, int bd);
160 0 : HIGH_FUN_CONV_2D(, sse2);
161 0 : HIGH_FUN_CONV_2D(avg_, sse2);
162 :
163 : #if CONFIG_LOOP_RESTORATION
164 : // The SSE2 highbd convolve functions can deal with coefficients up to 32767.
165 : // So redirect highbd_convolve8_add_src to regular highbd_convolve8.
166 : void aom_highbd_convolve8_add_src_sse2(const uint8_t *src, ptrdiff_t src_stride,
167 : uint8_t *dst, ptrdiff_t dst_stride,
168 : const int16_t *filter_x, int x_step_q4,
169 : const int16_t *filter_y, int y_step_q4,
170 : int w, int h, int bd) {
171 : assert(x_step_q4 == 16);
172 : assert(y_step_q4 == 16);
173 : ((int16_t *)filter_x)[3] += 128;
174 : ((int16_t *)filter_y)[3] += 128;
175 : aom_highbd_convolve8_sse2(src, src_stride, dst, dst_stride, filter_x,
176 : x_step_q4, filter_y, y_step_q4, w, h, bd);
177 : ((int16_t *)filter_x)[3] -= 128;
178 : ((int16_t *)filter_y)[3] -= 128;
179 : }
180 : #endif // CONFIG_LOOP_RESTORATION
181 : #endif // CONFIG_HIGHBITDEPTH && ARCH_X86_64
182 : #endif // HAVE_SSE2
|