Line data Source code
1 : /*
2 : * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
3 : *
4 : * Use of this source code is governed by a BSD-style license
5 : * that can be found in the LICENSE file in the root of the source
6 : * tree. An additional intellectual property rights grant can be found
7 : * in the file PATENTS. All contributing project authors may
8 : * be found in the AUTHORS file in the root of the source tree.
9 : */
10 :
11 : #include <assert.h>
12 : #include <string.h>
13 :
14 : #include "./vpx_config.h"
15 : #include "./vpx_dsp_rtcd.h"
16 : #include "vpx/vpx_integer.h"
17 : #include "vpx_dsp/vpx_convolve.h"
18 : #include "vpx_dsp/vpx_dsp_common.h"
19 : #include "vpx_dsp/vpx_filter.h"
20 : #include "vpx_ports/mem.h"
21 :
22 0 : static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride,
23 : uint8_t *dst, ptrdiff_t dst_stride,
24 : const InterpKernel *x_filters, int x0_q4,
25 : int x_step_q4, int w, int h) {
26 : int x, y;
27 0 : src -= SUBPEL_TAPS / 2 - 1;
28 :
29 0 : for (y = 0; y < h; ++y) {
30 0 : int x_q4 = x0_q4;
31 0 : for (x = 0; x < w; ++x) {
32 0 : const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
33 0 : const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
34 0 : int k, sum = 0;
35 0 : for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_x[k] * x_filter[k];
36 0 : dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
37 0 : x_q4 += x_step_q4;
38 : }
39 0 : src += src_stride;
40 0 : dst += dst_stride;
41 : }
42 0 : }
43 :
44 0 : static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride,
45 : uint8_t *dst, ptrdiff_t dst_stride,
46 : const InterpKernel *x_filters, int x0_q4,
47 : int x_step_q4, int w, int h) {
48 : int x, y;
49 0 : src -= SUBPEL_TAPS / 2 - 1;
50 :
51 0 : for (y = 0; y < h; ++y) {
52 0 : int x_q4 = x0_q4;
53 0 : for (x = 0; x < w; ++x) {
54 0 : const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
55 0 : const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
56 0 : int k, sum = 0;
57 0 : for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_x[k] * x_filter[k];
58 0 : dst[x] = ROUND_POWER_OF_TWO(
59 : dst[x] + clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
60 0 : x_q4 += x_step_q4;
61 : }
62 0 : src += src_stride;
63 0 : dst += dst_stride;
64 : }
65 0 : }
66 :
67 0 : static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride,
68 : uint8_t *dst, ptrdiff_t dst_stride,
69 : const InterpKernel *y_filters, int y0_q4,
70 : int y_step_q4, int w, int h) {
71 : int x, y;
72 0 : src -= src_stride * (SUBPEL_TAPS / 2 - 1);
73 :
74 0 : for (x = 0; x < w; ++x) {
75 0 : int y_q4 = y0_q4;
76 0 : for (y = 0; y < h; ++y) {
77 0 : const uint8_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
78 0 : const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
79 0 : int k, sum = 0;
80 0 : for (k = 0; k < SUBPEL_TAPS; ++k)
81 0 : sum += src_y[k * src_stride] * y_filter[k];
82 0 : dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
83 0 : y_q4 += y_step_q4;
84 : }
85 0 : ++src;
86 0 : ++dst;
87 : }
88 0 : }
89 :
90 0 : static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride,
91 : uint8_t *dst, ptrdiff_t dst_stride,
92 : const InterpKernel *y_filters, int y0_q4,
93 : int y_step_q4, int w, int h) {
94 : int x, y;
95 0 : src -= src_stride * (SUBPEL_TAPS / 2 - 1);
96 :
97 0 : for (x = 0; x < w; ++x) {
98 0 : int y_q4 = y0_q4;
99 0 : for (y = 0; y < h; ++y) {
100 0 : const uint8_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
101 0 : const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
102 0 : int k, sum = 0;
103 0 : for (k = 0; k < SUBPEL_TAPS; ++k)
104 0 : sum += src_y[k * src_stride] * y_filter[k];
105 0 : dst[y * dst_stride] = ROUND_POWER_OF_TWO(
106 : dst[y * dst_stride] +
107 : clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)),
108 : 1);
109 0 : y_q4 += y_step_q4;
110 : }
111 0 : ++src;
112 0 : ++dst;
113 : }
114 0 : }
115 :
116 0 : static void convolve(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
117 : ptrdiff_t dst_stride, const InterpKernel *const x_filters,
118 : int x0_q4, int x_step_q4,
119 : const InterpKernel *const y_filters, int y0_q4,
120 : int y_step_q4, int w, int h) {
121 : // Note: Fixed size intermediate buffer, temp, places limits on parameters.
122 : // 2d filtering proceeds in 2 steps:
123 : // (1) Interpolate horizontally into an intermediate buffer, temp.
124 : // (2) Interpolate temp vertically to derive the sub-pixel result.
125 : // Deriving the maximum number of rows in the temp buffer (135):
126 : // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).
127 : // --Largest block size is 64x64 pixels.
128 : // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the
129 : // original frame (in 1/16th pixel units).
130 : // --Must round-up because block may be located at sub-pixel position.
131 : // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
132 : // --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
133 : uint8_t temp[64 * 135];
134 0 : const int intermediate_height =
135 0 : (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;
136 :
137 0 : assert(w <= 64);
138 0 : assert(h <= 64);
139 0 : assert(y_step_q4 <= 32);
140 0 : assert(x_step_q4 <= 32);
141 :
142 0 : convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64,
143 : x_filters, x0_q4, x_step_q4, w, intermediate_height);
144 0 : convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride,
145 : y_filters, y0_q4, y_step_q4, w, h);
146 0 : }
147 :
148 0 : void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
149 : uint8_t *dst, ptrdiff_t dst_stride,
150 : const int16_t *filter_x, int x_step_q4,
151 : const int16_t *filter_y, int y_step_q4, int w,
152 : int h) {
153 0 : const InterpKernel *const filters_x = get_filter_base(filter_x);
154 0 : const int x0_q4 = get_filter_offset(filter_x, filters_x);
155 :
156 : (void)filter_y;
157 : (void)y_step_q4;
158 :
159 0 : convolve_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4, x_step_q4,
160 : w, h);
161 0 : }
162 :
163 0 : void vpx_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
164 : uint8_t *dst, ptrdiff_t dst_stride,
165 : const int16_t *filter_x, int x_step_q4,
166 : const int16_t *filter_y, int y_step_q4, int w,
167 : int h) {
168 0 : const InterpKernel *const filters_x = get_filter_base(filter_x);
169 0 : const int x0_q4 = get_filter_offset(filter_x, filters_x);
170 :
171 : (void)filter_y;
172 : (void)y_step_q4;
173 :
174 0 : convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4,
175 : x_step_q4, w, h);
176 0 : }
177 :
178 0 : void vpx_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
179 : uint8_t *dst, ptrdiff_t dst_stride,
180 : const int16_t *filter_x, int x_step_q4,
181 : const int16_t *filter_y, int y_step_q4, int w,
182 : int h) {
183 0 : const InterpKernel *const filters_y = get_filter_base(filter_y);
184 0 : const int y0_q4 = get_filter_offset(filter_y, filters_y);
185 :
186 : (void)filter_x;
187 : (void)x_step_q4;
188 :
189 0 : convolve_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4, y_step_q4,
190 : w, h);
191 0 : }
192 :
193 0 : void vpx_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
194 : uint8_t *dst, ptrdiff_t dst_stride,
195 : const int16_t *filter_x, int x_step_q4,
196 : const int16_t *filter_y, int y_step_q4, int w,
197 : int h) {
198 0 : const InterpKernel *const filters_y = get_filter_base(filter_y);
199 0 : const int y0_q4 = get_filter_offset(filter_y, filters_y);
200 :
201 : (void)filter_x;
202 : (void)x_step_q4;
203 :
204 0 : convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4,
205 : y_step_q4, w, h);
206 0 : }
207 :
208 0 : void vpx_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
209 : ptrdiff_t dst_stride, const int16_t *filter_x,
210 : int x_step_q4, const int16_t *filter_y, int y_step_q4,
211 : int w, int h) {
212 0 : const InterpKernel *const filters_x = get_filter_base(filter_x);
213 0 : const int x0_q4 = get_filter_offset(filter_x, filters_x);
214 0 : const InterpKernel *const filters_y = get_filter_base(filter_y);
215 0 : const int y0_q4 = get_filter_offset(filter_y, filters_y);
216 :
217 0 : convolve(src, src_stride, dst, dst_stride, filters_x, x0_q4, x_step_q4,
218 : filters_y, y0_q4, y_step_q4, w, h);
219 0 : }
220 :
221 0 : void vpx_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
222 : ptrdiff_t dst_stride, const int16_t *filter_x,
223 : int x_step_q4, const int16_t *filter_y, int y_step_q4,
224 : int w, int h) {
225 : // Fixed size intermediate buffer places limits on parameters.
226 : DECLARE_ALIGNED(16, uint8_t, temp[64 * 64]);
227 0 : assert(w <= 64);
228 0 : assert(h <= 64);
229 :
230 0 : vpx_convolve8_c(src, src_stride, temp, 64, filter_x, x_step_q4, filter_y,
231 : y_step_q4, w, h);
232 0 : vpx_convolve_avg_c(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h);
233 0 : }
234 :
235 0 : void vpx_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
236 : ptrdiff_t dst_stride, const int16_t *filter_x,
237 : int filter_x_stride, const int16_t *filter_y,
238 : int filter_y_stride, int w, int h) {
239 : int r;
240 :
241 : (void)filter_x;
242 : (void)filter_x_stride;
243 : (void)filter_y;
244 : (void)filter_y_stride;
245 :
246 0 : for (r = h; r > 0; --r) {
247 0 : memcpy(dst, src, w);
248 0 : src += src_stride;
249 0 : dst += dst_stride;
250 : }
251 0 : }
252 :
253 0 : void vpx_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
254 : ptrdiff_t dst_stride, const int16_t *filter_x,
255 : int filter_x_stride, const int16_t *filter_y,
256 : int filter_y_stride, int w, int h) {
257 : int x, y;
258 :
259 : (void)filter_x;
260 : (void)filter_x_stride;
261 : (void)filter_y;
262 : (void)filter_y_stride;
263 :
264 0 : for (y = 0; y < h; ++y) {
265 0 : for (x = 0; x < w; ++x) dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);
266 0 : src += src_stride;
267 0 : dst += dst_stride;
268 : }
269 0 : }
270 :
271 0 : void vpx_scaled_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
272 : ptrdiff_t dst_stride, const int16_t *filter_x,
273 : int x_step_q4, const int16_t *filter_y, int y_step_q4,
274 : int w, int h) {
275 0 : vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
276 : filter_y, y_step_q4, w, h);
277 0 : }
278 :
279 0 : void vpx_scaled_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
280 : ptrdiff_t dst_stride, const int16_t *filter_x,
281 : int x_step_q4, const int16_t *filter_y, int y_step_q4,
282 : int w, int h) {
283 0 : vpx_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
284 : filter_y, y_step_q4, w, h);
285 0 : }
286 :
287 0 : void vpx_scaled_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
288 : ptrdiff_t dst_stride, const int16_t *filter_x,
289 : int x_step_q4, const int16_t *filter_y, int y_step_q4,
290 : int w, int h) {
291 0 : vpx_convolve8_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
292 : filter_y, y_step_q4, w, h);
293 0 : }
294 :
295 0 : void vpx_scaled_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
296 : uint8_t *dst, ptrdiff_t dst_stride,
297 : const int16_t *filter_x, int x_step_q4,
298 : const int16_t *filter_y, int y_step_q4, int w,
299 : int h) {
300 0 : vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x,
301 : x_step_q4, filter_y, y_step_q4, w, h);
302 0 : }
303 :
304 0 : void vpx_scaled_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
305 : uint8_t *dst, ptrdiff_t dst_stride,
306 : const int16_t *filter_x, int x_step_q4,
307 : const int16_t *filter_y, int y_step_q4, int w,
308 : int h) {
309 0 : vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x,
310 : x_step_q4, filter_y, y_step_q4, w, h);
311 0 : }
312 :
313 0 : void vpx_scaled_avg_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
314 : ptrdiff_t dst_stride, const int16_t *filter_x,
315 : int x_step_q4, const int16_t *filter_y, int y_step_q4,
316 : int w, int h) {
317 0 : vpx_convolve8_avg_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
318 : filter_y, y_step_q4, w, h);
319 0 : }
320 :
321 : #if CONFIG_VP9_HIGHBITDEPTH
322 : static void highbd_convolve_horiz(const uint8_t *src8, ptrdiff_t src_stride,
323 : uint8_t *dst8, ptrdiff_t dst_stride,
324 : const InterpKernel *x_filters, int x0_q4,
325 : int x_step_q4, int w, int h, int bd) {
326 : int x, y;
327 : const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
328 : uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
329 : src -= SUBPEL_TAPS / 2 - 1;
330 :
331 : for (y = 0; y < h; ++y) {
332 : int x_q4 = x0_q4;
333 : for (x = 0; x < w; ++x) {
334 : const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
335 : const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
336 : int k, sum = 0;
337 : for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_x[k] * x_filter[k];
338 : dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
339 : x_q4 += x_step_q4;
340 : }
341 : src += src_stride;
342 : dst += dst_stride;
343 : }
344 : }
345 :
346 : static void highbd_convolve_avg_horiz(const uint8_t *src8, ptrdiff_t src_stride,
347 : uint8_t *dst8, ptrdiff_t dst_stride,
348 : const InterpKernel *x_filters, int x0_q4,
349 : int x_step_q4, int w, int h, int bd) {
350 : int x, y;
351 : const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
352 : uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
353 : src -= SUBPEL_TAPS / 2 - 1;
354 :
355 : for (y = 0; y < h; ++y) {
356 : int x_q4 = x0_q4;
357 : for (x = 0; x < w; ++x) {
358 : const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
359 : const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
360 : int k, sum = 0;
361 : for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_x[k] * x_filter[k];
362 : dst[x] = ROUND_POWER_OF_TWO(
363 : dst[x] + clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),
364 : 1);
365 : x_q4 += x_step_q4;
366 : }
367 : src += src_stride;
368 : dst += dst_stride;
369 : }
370 : }
371 :
372 : static void highbd_convolve_vert(const uint8_t *src8, ptrdiff_t src_stride,
373 : uint8_t *dst8, ptrdiff_t dst_stride,
374 : const InterpKernel *y_filters, int y0_q4,
375 : int y_step_q4, int w, int h, int bd) {
376 : int x, y;
377 : const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
378 : uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
379 : src -= src_stride * (SUBPEL_TAPS / 2 - 1);
380 :
381 : for (x = 0; x < w; ++x) {
382 : int y_q4 = y0_q4;
383 : for (y = 0; y < h; ++y) {
384 : const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
385 : const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
386 : int k, sum = 0;
387 : for (k = 0; k < SUBPEL_TAPS; ++k)
388 : sum += src_y[k * src_stride] * y_filter[k];
389 : dst[y * dst_stride] =
390 : clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
391 : y_q4 += y_step_q4;
392 : }
393 : ++src;
394 : ++dst;
395 : }
396 : }
397 :
398 : static void highbd_convolve_avg_vert(const uint8_t *src8, ptrdiff_t src_stride,
399 : uint8_t *dst8, ptrdiff_t dst_stride,
400 : const InterpKernel *y_filters, int y0_q4,
401 : int y_step_q4, int w, int h, int bd) {
402 : int x, y;
403 : const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
404 : uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
405 : src -= src_stride * (SUBPEL_TAPS / 2 - 1);
406 :
407 : for (x = 0; x < w; ++x) {
408 : int y_q4 = y0_q4;
409 : for (y = 0; y < h; ++y) {
410 : const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
411 : const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
412 : int k, sum = 0;
413 : for (k = 0; k < SUBPEL_TAPS; ++k)
414 : sum += src_y[k * src_stride] * y_filter[k];
415 : dst[y * dst_stride] = ROUND_POWER_OF_TWO(
416 : dst[y * dst_stride] +
417 : clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),
418 : 1);
419 : y_q4 += y_step_q4;
420 : }
421 : ++src;
422 : ++dst;
423 : }
424 : }
425 :
426 : static void highbd_convolve(const uint8_t *src, ptrdiff_t src_stride,
427 : uint8_t *dst, ptrdiff_t dst_stride,
428 : const InterpKernel *const x_filters, int x0_q4,
429 : int x_step_q4, const InterpKernel *const y_filters,
430 : int y0_q4, int y_step_q4, int w, int h, int bd) {
431 : // Note: Fixed size intermediate buffer, temp, places limits on parameters.
432 : // 2d filtering proceeds in 2 steps:
433 : // (1) Interpolate horizontally into an intermediate buffer, temp.
434 : // (2) Interpolate temp vertically to derive the sub-pixel result.
435 : // Deriving the maximum number of rows in the temp buffer (135):
436 : // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).
437 : // --Largest block size is 64x64 pixels.
438 : // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the
439 : // original frame (in 1/16th pixel units).
440 : // --Must round-up because block may be located at sub-pixel position.
441 : // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
442 : // --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
443 : uint16_t temp[64 * 135];
444 : const int intermediate_height =
445 : (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;
446 :
447 : assert(w <= 64);
448 : assert(h <= 64);
449 : assert(y_step_q4 <= 32);
450 : assert(x_step_q4 <= 32);
451 :
452 : highbd_convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride,
453 : CONVERT_TO_BYTEPTR(temp), 64, x_filters, x0_q4,
454 : x_step_q4, w, intermediate_height, bd);
455 : highbd_convolve_vert(CONVERT_TO_BYTEPTR(temp) + 64 * (SUBPEL_TAPS / 2 - 1),
456 : 64, dst, dst_stride, y_filters, y0_q4, y_step_q4, w, h,
457 : bd);
458 : }
459 :
460 : void vpx_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
461 : uint8_t *dst, ptrdiff_t dst_stride,
462 : const int16_t *filter_x, int x_step_q4,
463 : const int16_t *filter_y, int y_step_q4, int w,
464 : int h, int bd) {
465 : const InterpKernel *const filters_x = get_filter_base(filter_x);
466 : const int x0_q4 = get_filter_offset(filter_x, filters_x);
467 :
468 : (void)filter_y;
469 : (void)y_step_q4;
470 :
471 : highbd_convolve_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4,
472 : x_step_q4, w, h, bd);
473 : }
474 :
475 : void vpx_highbd_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
476 : uint8_t *dst, ptrdiff_t dst_stride,
477 : const int16_t *filter_x, int x_step_q4,
478 : const int16_t *filter_y, int y_step_q4,
479 : int w, int h, int bd) {
480 : const InterpKernel *const filters_x = get_filter_base(filter_x);
481 : const int x0_q4 = get_filter_offset(filter_x, filters_x);
482 :
483 : (void)filter_y;
484 : (void)y_step_q4;
485 :
486 : highbd_convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4,
487 : x_step_q4, w, h, bd);
488 : }
489 :
490 : void vpx_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
491 : uint8_t *dst, ptrdiff_t dst_stride,
492 : const int16_t *filter_x, int x_step_q4,
493 : const int16_t *filter_y, int y_step_q4, int w,
494 : int h, int bd) {
495 : const InterpKernel *const filters_y = get_filter_base(filter_y);
496 : const int y0_q4 = get_filter_offset(filter_y, filters_y);
497 :
498 : (void)filter_x;
499 : (void)x_step_q4;
500 :
501 : highbd_convolve_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4,
502 : y_step_q4, w, h, bd);
503 : }
504 :
505 : void vpx_highbd_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
506 : uint8_t *dst, ptrdiff_t dst_stride,
507 : const int16_t *filter_x, int x_step_q4,
508 : const int16_t *filter_y, int y_step_q4,
509 : int w, int h, int bd) {
510 : const InterpKernel *const filters_y = get_filter_base(filter_y);
511 : const int y0_q4 = get_filter_offset(filter_y, filters_y);
512 :
513 : (void)filter_x;
514 : (void)x_step_q4;
515 :
516 : highbd_convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4,
517 : y_step_q4, w, h, bd);
518 : }
519 :
520 : void vpx_highbd_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
521 : uint8_t *dst, ptrdiff_t dst_stride,
522 : const int16_t *filter_x, int x_step_q4,
523 : const int16_t *filter_y, int y_step_q4, int w,
524 : int h, int bd) {
525 : const InterpKernel *const filters_x = get_filter_base(filter_x);
526 : const int x0_q4 = get_filter_offset(filter_x, filters_x);
527 : const InterpKernel *const filters_y = get_filter_base(filter_y);
528 : const int y0_q4 = get_filter_offset(filter_y, filters_y);
529 :
530 : highbd_convolve(src, src_stride, dst, dst_stride, filters_x, x0_q4, x_step_q4,
531 : filters_y, y0_q4, y_step_q4, w, h, bd);
532 : }
533 :
534 : void vpx_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
535 : uint8_t *dst, ptrdiff_t dst_stride,
536 : const int16_t *filter_x, int x_step_q4,
537 : const int16_t *filter_y, int y_step_q4, int w,
538 : int h, int bd) {
539 : // Fixed size intermediate buffer places limits on parameters.
540 : DECLARE_ALIGNED(16, uint16_t, temp[64 * 64]);
541 : assert(w <= 64);
542 : assert(h <= 64);
543 :
544 : vpx_highbd_convolve8_c(src, src_stride, CONVERT_TO_BYTEPTR(temp), 64,
545 : filter_x, x_step_q4, filter_y, y_step_q4, w, h, bd);
546 : vpx_highbd_convolve_avg_c(CONVERT_TO_BYTEPTR(temp), 64, dst, dst_stride, NULL,
547 : 0, NULL, 0, w, h, bd);
548 : }
549 :
550 : void vpx_highbd_convolve_copy_c(const uint8_t *src8, ptrdiff_t src_stride,
551 : uint8_t *dst8, ptrdiff_t dst_stride,
552 : const int16_t *filter_x, int filter_x_stride,
553 : const int16_t *filter_y, int filter_y_stride,
554 : int w, int h, int bd) {
555 : int r;
556 : const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
557 : uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
558 :
559 : (void)filter_x;
560 : (void)filter_x_stride;
561 : (void)filter_y;
562 : (void)filter_y_stride;
563 : (void)bd;
564 :
565 : for (r = h; r > 0; --r) {
566 : memcpy(dst, src, w * sizeof(uint16_t));
567 : src += src_stride;
568 : dst += dst_stride;
569 : }
570 : }
571 :
572 : void vpx_highbd_convolve_avg_c(const uint8_t *src8, ptrdiff_t src_stride,
573 : uint8_t *dst8, ptrdiff_t dst_stride,
574 : const int16_t *filter_x, int filter_x_stride,
575 : const int16_t *filter_y, int filter_y_stride,
576 : int w, int h, int bd) {
577 : int x, y;
578 : const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
579 : uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
580 :
581 : (void)filter_x;
582 : (void)filter_x_stride;
583 : (void)filter_y;
584 : (void)filter_y_stride;
585 : (void)bd;
586 :
587 : for (y = 0; y < h; ++y) {
588 : for (x = 0; x < w; ++x) dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);
589 : src += src_stride;
590 : dst += dst_stride;
591 : }
592 : }
593 : #endif
|