Line data Source code
1 : /*
2 : * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 : *
4 : * This source code is subject to the terms of the BSD 2 Clause License and
5 : * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 : * was not distributed with this source code in the LICENSE file, you can
7 : * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 : * Media Patent License 1.0 was not distributed with this source code in the
9 : * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 : */
11 : #include <stdlib.h>
12 :
13 : #include "./aom_config.h"
14 : #include "./aom_dsp_rtcd.h"
15 :
16 : #include "aom_ports/mem.h"
17 : #include "aom/aom_integer.h"
18 :
19 : #include "aom_dsp/variance.h"
20 : #include "aom_dsp/aom_filter.h"
21 : #include "aom_dsp/blend.h"
22 :
23 0 : uint32_t aom_get4x4sse_cs_c(const uint8_t *a, int a_stride, const uint8_t *b,
24 : int b_stride) {
25 0 : int distortion = 0;
26 : int r, c;
27 :
28 0 : for (r = 0; r < 4; ++r) {
29 0 : for (c = 0; c < 4; ++c) {
30 0 : int diff = a[c] - b[c];
31 0 : distortion += diff * diff;
32 : }
33 :
34 0 : a += a_stride;
35 0 : b += b_stride;
36 : }
37 :
38 0 : return distortion;
39 : }
40 :
41 0 : uint32_t aom_get_mb_ss_c(const int16_t *a) {
42 0 : unsigned int i, sum = 0;
43 :
44 0 : for (i = 0; i < 256; ++i) {
45 0 : sum += a[i] * a[i];
46 : }
47 :
48 0 : return sum;
49 : }
50 :
51 0 : uint32_t aom_variance_halfpixvar16x16_h_c(const uint8_t *a, int a_stride,
52 : const uint8_t *b, int b_stride,
53 : uint32_t *sse) {
54 0 : return aom_sub_pixel_variance16x16_c(a, a_stride, 4, 0, b, b_stride, sse);
55 : }
56 :
57 0 : uint32_t aom_variance_halfpixvar16x16_v_c(const uint8_t *a, int a_stride,
58 : const uint8_t *b, int b_stride,
59 : uint32_t *sse) {
60 0 : return aom_sub_pixel_variance16x16_c(a, a_stride, 0, 4, b, b_stride, sse);
61 : }
62 :
63 0 : uint32_t aom_variance_halfpixvar16x16_hv_c(const uint8_t *a, int a_stride,
64 : const uint8_t *b, int b_stride,
65 : uint32_t *sse) {
66 0 : return aom_sub_pixel_variance16x16_c(a, a_stride, 4, 4, b, b_stride, sse);
67 : }
68 :
69 0 : static void variance(const uint8_t *a, int a_stride, const uint8_t *b,
70 : int b_stride, int w, int h, uint32_t *sse, int *sum) {
71 : int i, j;
72 :
73 0 : *sum = 0;
74 0 : *sse = 0;
75 :
76 0 : for (i = 0; i < h; ++i) {
77 0 : for (j = 0; j < w; ++j) {
78 0 : const int diff = a[j] - b[j];
79 0 : *sum += diff;
80 0 : *sse += diff * diff;
81 : }
82 :
83 0 : a += a_stride;
84 0 : b += b_stride;
85 : }
86 0 : }
87 :
88 0 : uint32_t aom_sse_odd_size(const uint8_t *a, int a_stride, const uint8_t *b,
89 : int b_stride, int w, int h) {
90 : uint32_t sse;
91 : int sum;
92 0 : variance(a, a_stride, b, b_stride, w, h, &sse, &sum);
93 0 : return sse;
94 : }
95 :
96 : // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
97 : // or vertical direction to produce the filtered output block. Used to implement
98 : // the first-pass of 2-D separable filter.
99 : //
100 : // Produces int16_t output to retain precision for the next pass. Two filter
101 : // taps should sum to FILTER_WEIGHT. pixel_step defines whether the filter is
102 : // applied horizontally (pixel_step = 1) or vertically (pixel_step = stride).
103 : // It defines the offset required to move from one input to the next.
104 0 : static void var_filter_block2d_bil_first_pass(const uint8_t *a, uint16_t *b,
105 : unsigned int src_pixels_per_line,
106 : int pixel_step,
107 : unsigned int output_height,
108 : unsigned int output_width,
109 : const uint8_t *filter) {
110 : unsigned int i, j;
111 :
112 0 : for (i = 0; i < output_height; ++i) {
113 0 : for (j = 0; j < output_width; ++j) {
114 0 : b[j] = ROUND_POWER_OF_TWO(
115 : (int)a[0] * filter[0] + (int)a[pixel_step] * filter[1], FILTER_BITS);
116 :
117 0 : ++a;
118 : }
119 :
120 0 : a += src_pixels_per_line - output_width;
121 0 : b += output_width;
122 : }
123 0 : }
124 :
125 : // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
126 : // or vertical direction to produce the filtered output block. Used to implement
127 : // the second-pass of 2-D separable filter.
128 : //
129 : // Requires 16-bit input as produced by filter_block2d_bil_first_pass. Two
130 : // filter taps should sum to FILTER_WEIGHT. pixel_step defines whether the
131 : // filter is applied horizontally (pixel_step = 1) or vertically
132 : // (pixel_step = stride). It defines the offset required to move from one input
133 : // to the next. Output is 8-bit.
134 0 : static void var_filter_block2d_bil_second_pass(const uint16_t *a, uint8_t *b,
135 : unsigned int src_pixels_per_line,
136 : unsigned int pixel_step,
137 : unsigned int output_height,
138 : unsigned int output_width,
139 : const uint8_t *filter) {
140 : unsigned int i, j;
141 :
142 0 : for (i = 0; i < output_height; ++i) {
143 0 : for (j = 0; j < output_width; ++j) {
144 0 : b[j] = ROUND_POWER_OF_TWO(
145 : (int)a[0] * filter[0] + (int)a[pixel_step] * filter[1], FILTER_BITS);
146 0 : ++a;
147 : }
148 :
149 0 : a += src_pixels_per_line - output_width;
150 0 : b += output_width;
151 : }
152 0 : }
153 :
154 : #define VAR(W, H) \
155 : uint32_t aom_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
156 : const uint8_t *b, int b_stride, \
157 : uint32_t *sse) { \
158 : int sum; \
159 : variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
160 : return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \
161 : }
162 :
163 : #define SUBPIX_VAR(W, H) \
164 : uint32_t aom_sub_pixel_variance##W##x##H##_c( \
165 : const uint8_t *a, int a_stride, int xoffset, int yoffset, \
166 : const uint8_t *b, int b_stride, uint32_t *sse) { \
167 : uint16_t fdata3[(H + 1) * W]; \
168 : uint8_t temp2[H * W]; \
169 : \
170 : var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W, \
171 : bilinear_filters_2t[xoffset]); \
172 : var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
173 : bilinear_filters_2t[yoffset]); \
174 : \
175 : return aom_variance##W##x##H##_c(temp2, W, b, b_stride, sse); \
176 : }
177 :
178 : #define SUBPIX_AVG_VAR(W, H) \
179 : uint32_t aom_sub_pixel_avg_variance##W##x##H##_c( \
180 : const uint8_t *a, int a_stride, int xoffset, int yoffset, \
181 : const uint8_t *b, int b_stride, uint32_t *sse, \
182 : const uint8_t *second_pred) { \
183 : uint16_t fdata3[(H + 1) * W]; \
184 : uint8_t temp2[H * W]; \
185 : DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
186 : \
187 : var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W, \
188 : bilinear_filters_2t[xoffset]); \
189 : var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
190 : bilinear_filters_2t[yoffset]); \
191 : \
192 : aom_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \
193 : \
194 : return aom_variance##W##x##H##_c(temp3, W, b, b_stride, sse); \
195 : }
196 :
197 : /* Identical to the variance call except it takes an additional parameter, sum,
198 : * and returns that value using pass-by-reference instead of returning
199 : * sse - sum^2 / w*h
200 : */
201 : #define GET_VAR(W, H) \
202 : void aom_get##W##x##H##var_c(const uint8_t *a, int a_stride, \
203 : const uint8_t *b, int b_stride, uint32_t *sse, \
204 : int *sum) { \
205 : variance(a, a_stride, b, b_stride, W, H, sse, sum); \
206 : }
207 :
208 : /* Identical to the variance call except it does not calculate the
209 : * sse - sum^2 / w*h and returns sse in addtion to modifying the passed in
210 : * variable.
211 : */
212 : #define MSE(W, H) \
213 : uint32_t aom_mse##W##x##H##_c(const uint8_t *a, int a_stride, \
214 : const uint8_t *b, int b_stride, \
215 : uint32_t *sse) { \
216 : int sum; \
217 : variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
218 : return *sse; \
219 : }
220 :
221 : /* All three forms of the variance are available in the same sizes. */
222 : #define VARIANCES(W, H) \
223 : VAR(W, H) \
224 : SUBPIX_VAR(W, H) \
225 : SUBPIX_AVG_VAR(W, H)
226 :
227 : #if CONFIG_AV1 && CONFIG_EXT_PARTITION
228 : VARIANCES(128, 128)
229 : VARIANCES(128, 64)
230 : VARIANCES(64, 128)
231 : #endif // CONFIG_AV1 && CONFIG_EXT_PARTITION
232 0 : VARIANCES(64, 64)
233 0 : VARIANCES(64, 32)
234 0 : VARIANCES(32, 64)
235 0 : VARIANCES(32, 32)
236 0 : VARIANCES(32, 16)
237 0 : VARIANCES(16, 32)
238 0 : VARIANCES(16, 16)
239 0 : VARIANCES(16, 8)
240 0 : VARIANCES(8, 16)
241 0 : VARIANCES(8, 8)
242 0 : VARIANCES(8, 4)
243 0 : VARIANCES(4, 8)
244 0 : VARIANCES(4, 4)
245 0 : VARIANCES(4, 2)
246 0 : VARIANCES(2, 4)
247 0 : VARIANCES(2, 2)
248 :
249 0 : GET_VAR(16, 16)
250 0 : GET_VAR(8, 8)
251 :
252 0 : MSE(16, 16)
253 0 : MSE(16, 8)
254 0 : MSE(8, 16)
255 0 : MSE(8, 8)
256 :
257 0 : void aom_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width,
258 : int height, const uint8_t *ref, int ref_stride) {
259 : int i, j;
260 :
261 0 : for (i = 0; i < height; ++i) {
262 0 : for (j = 0; j < width; ++j) {
263 0 : const int tmp = pred[j] + ref[j];
264 0 : comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
265 : }
266 0 : comp_pred += width;
267 0 : pred += width;
268 0 : ref += ref_stride;
269 : }
270 0 : }
271 :
272 : // Get pred block from up-sampled reference.
273 0 : void aom_upsampled_pred_c(uint8_t *comp_pred, int width, int height,
274 : const uint8_t *ref, int ref_stride) {
275 : int i, j, k;
276 0 : int stride = ref_stride << 3;
277 :
278 0 : for (i = 0; i < height; i++) {
279 0 : for (j = 0, k = 0; j < width; j++, k += 8) {
280 0 : comp_pred[j] = ref[k];
281 : }
282 0 : comp_pred += width;
283 0 : ref += stride;
284 : }
285 0 : }
286 :
287 0 : void aom_comp_avg_upsampled_pred_c(uint8_t *comp_pred, const uint8_t *pred,
288 : int width, int height, const uint8_t *ref,
289 : int ref_stride) {
290 : int i, j;
291 0 : int stride = ref_stride << 3;
292 :
293 0 : for (i = 0; i < height; i++) {
294 0 : for (j = 0; j < width; j++) {
295 0 : const int tmp = ref[(j << 3)] + pred[j];
296 0 : comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
297 : }
298 0 : comp_pred += width;
299 0 : pred += width;
300 0 : ref += stride;
301 : }
302 0 : }
303 :
304 : #if CONFIG_HIGHBITDEPTH
305 0 : static void highbd_variance64(const uint8_t *a8, int a_stride,
306 : const uint8_t *b8, int b_stride, int w, int h,
307 : uint64_t *sse, int64_t *sum) {
308 : int i, j;
309 :
310 0 : uint16_t *a = CONVERT_TO_SHORTPTR(a8);
311 0 : uint16_t *b = CONVERT_TO_SHORTPTR(b8);
312 0 : *sum = 0;
313 0 : *sse = 0;
314 :
315 0 : for (i = 0; i < h; ++i) {
316 0 : for (j = 0; j < w; ++j) {
317 0 : const int diff = a[j] - b[j];
318 0 : *sum += diff;
319 0 : *sse += diff * diff;
320 : }
321 0 : a += a_stride;
322 0 : b += b_stride;
323 : }
324 0 : }
325 :
326 0 : uint64_t aom_highbd_sse_odd_size(const uint8_t *a, int a_stride,
327 : const uint8_t *b, int b_stride, int w, int h) {
328 : uint64_t sse;
329 : int64_t sum;
330 0 : highbd_variance64(a, a_stride, b, b_stride, w, h, &sse, &sum);
331 0 : return sse;
332 : }
333 :
334 0 : static void highbd_8_variance(const uint8_t *a8, int a_stride,
335 : const uint8_t *b8, int b_stride, int w, int h,
336 : uint32_t *sse, int *sum) {
337 0 : uint64_t sse_long = 0;
338 0 : int64_t sum_long = 0;
339 0 : highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
340 0 : *sse = (uint32_t)sse_long;
341 0 : *sum = (int)sum_long;
342 0 : }
343 :
344 0 : static void highbd_10_variance(const uint8_t *a8, int a_stride,
345 : const uint8_t *b8, int b_stride, int w, int h,
346 : uint32_t *sse, int *sum) {
347 0 : uint64_t sse_long = 0;
348 0 : int64_t sum_long = 0;
349 0 : highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
350 0 : *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 4);
351 0 : *sum = (int)ROUND_POWER_OF_TWO(sum_long, 2);
352 0 : }
353 :
354 0 : static void highbd_12_variance(const uint8_t *a8, int a_stride,
355 : const uint8_t *b8, int b_stride, int w, int h,
356 : uint32_t *sse, int *sum) {
357 0 : uint64_t sse_long = 0;
358 0 : int64_t sum_long = 0;
359 0 : highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
360 0 : *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 8);
361 0 : *sum = (int)ROUND_POWER_OF_TWO(sum_long, 4);
362 0 : }
363 :
364 : #define HIGHBD_VAR(W, H) \
365 : uint32_t aom_highbd_8_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
366 : const uint8_t *b, int b_stride, \
367 : uint32_t *sse) { \
368 : int sum; \
369 : highbd_8_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
370 : return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \
371 : } \
372 : \
373 : uint32_t aom_highbd_10_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
374 : const uint8_t *b, int b_stride, \
375 : uint32_t *sse) { \
376 : int sum; \
377 : int64_t var; \
378 : highbd_10_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
379 : var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
380 : return (var >= 0) ? (uint32_t)var : 0; \
381 : } \
382 : \
383 : uint32_t aom_highbd_12_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
384 : const uint8_t *b, int b_stride, \
385 : uint32_t *sse) { \
386 : int sum; \
387 : int64_t var; \
388 : highbd_12_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
389 : var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
390 : return (var >= 0) ? (uint32_t)var : 0; \
391 : }
392 :
393 : #define HIGHBD_GET_VAR(S) \
394 : void aom_highbd_8_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
395 : const uint8_t *ref, int ref_stride, \
396 : uint32_t *sse, int *sum) { \
397 : highbd_8_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
398 : } \
399 : \
400 : void aom_highbd_10_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
401 : const uint8_t *ref, int ref_stride, \
402 : uint32_t *sse, int *sum) { \
403 : highbd_10_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
404 : } \
405 : \
406 : void aom_highbd_12_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
407 : const uint8_t *ref, int ref_stride, \
408 : uint32_t *sse, int *sum) { \
409 : highbd_12_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
410 : }
411 :
412 : #define HIGHBD_MSE(W, H) \
413 : uint32_t aom_highbd_8_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
414 : const uint8_t *ref, int ref_stride, \
415 : uint32_t *sse) { \
416 : int sum; \
417 : highbd_8_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
418 : return *sse; \
419 : } \
420 : \
421 : uint32_t aom_highbd_10_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
422 : const uint8_t *ref, int ref_stride, \
423 : uint32_t *sse) { \
424 : int sum; \
425 : highbd_10_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
426 : return *sse; \
427 : } \
428 : \
429 : uint32_t aom_highbd_12_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
430 : const uint8_t *ref, int ref_stride, \
431 : uint32_t *sse) { \
432 : int sum; \
433 : highbd_12_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
434 : return *sse; \
435 : }
436 :
437 0 : void aom_highbd_var_filter_block2d_bil_first_pass(
438 : const uint8_t *src_ptr8, uint16_t *output_ptr,
439 : unsigned int src_pixels_per_line, int pixel_step,
440 : unsigned int output_height, unsigned int output_width,
441 : const uint8_t *filter) {
442 : unsigned int i, j;
443 0 : uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8);
444 0 : for (i = 0; i < output_height; ++i) {
445 0 : for (j = 0; j < output_width; ++j) {
446 0 : output_ptr[j] = ROUND_POWER_OF_TWO(
447 : (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
448 : FILTER_BITS);
449 :
450 0 : ++src_ptr;
451 : }
452 :
453 : // Next row...
454 0 : src_ptr += src_pixels_per_line - output_width;
455 0 : output_ptr += output_width;
456 : }
457 0 : }
458 :
459 0 : void aom_highbd_var_filter_block2d_bil_second_pass(
460 : const uint16_t *src_ptr, uint16_t *output_ptr,
461 : unsigned int src_pixels_per_line, unsigned int pixel_step,
462 : unsigned int output_height, unsigned int output_width,
463 : const uint8_t *filter) {
464 : unsigned int i, j;
465 :
466 0 : for (i = 0; i < output_height; ++i) {
467 0 : for (j = 0; j < output_width; ++j) {
468 0 : output_ptr[j] = ROUND_POWER_OF_TWO(
469 : (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
470 : FILTER_BITS);
471 0 : ++src_ptr;
472 : }
473 :
474 0 : src_ptr += src_pixels_per_line - output_width;
475 0 : output_ptr += output_width;
476 : }
477 0 : }
478 :
479 : #define HIGHBD_SUBPIX_VAR(W, H) \
480 : uint32_t aom_highbd_8_sub_pixel_variance##W##x##H##_c( \
481 : const uint8_t *src, int src_stride, int xoffset, int yoffset, \
482 : const uint8_t *dst, int dst_stride, uint32_t *sse) { \
483 : uint16_t fdata3[(H + 1) * W]; \
484 : uint16_t temp2[H * W]; \
485 : \
486 : aom_highbd_var_filter_block2d_bil_first_pass( \
487 : src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
488 : aom_highbd_var_filter_block2d_bil_second_pass( \
489 : fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
490 : \
491 : return aom_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
492 : dst, dst_stride, sse); \
493 : } \
494 : \
495 : uint32_t aom_highbd_10_sub_pixel_variance##W##x##H##_c( \
496 : const uint8_t *src, int src_stride, int xoffset, int yoffset, \
497 : const uint8_t *dst, int dst_stride, uint32_t *sse) { \
498 : uint16_t fdata3[(H + 1) * W]; \
499 : uint16_t temp2[H * W]; \
500 : \
501 : aom_highbd_var_filter_block2d_bil_first_pass( \
502 : src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
503 : aom_highbd_var_filter_block2d_bil_second_pass( \
504 : fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
505 : \
506 : return aom_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
507 : dst, dst_stride, sse); \
508 : } \
509 : \
510 : uint32_t aom_highbd_12_sub_pixel_variance##W##x##H##_c( \
511 : const uint8_t *src, int src_stride, int xoffset, int yoffset, \
512 : const uint8_t *dst, int dst_stride, uint32_t *sse) { \
513 : uint16_t fdata3[(H + 1) * W]; \
514 : uint16_t temp2[H * W]; \
515 : \
516 : aom_highbd_var_filter_block2d_bil_first_pass( \
517 : src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
518 : aom_highbd_var_filter_block2d_bil_second_pass( \
519 : fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
520 : \
521 : return aom_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
522 : dst, dst_stride, sse); \
523 : }
524 :
525 : #define HIGHBD_SUBPIX_AVG_VAR(W, H) \
526 : uint32_t aom_highbd_8_sub_pixel_avg_variance##W##x##H##_c( \
527 : const uint8_t *src, int src_stride, int xoffset, int yoffset, \
528 : const uint8_t *dst, int dst_stride, uint32_t *sse, \
529 : const uint8_t *second_pred) { \
530 : uint16_t fdata3[(H + 1) * W]; \
531 : uint16_t temp2[H * W]; \
532 : DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
533 : \
534 : aom_highbd_var_filter_block2d_bil_first_pass( \
535 : src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
536 : aom_highbd_var_filter_block2d_bil_second_pass( \
537 : fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
538 : \
539 : aom_highbd_comp_avg_pred_c(temp3, second_pred, W, H, \
540 : CONVERT_TO_BYTEPTR(temp2), W); \
541 : \
542 : return aom_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
543 : dst, dst_stride, sse); \
544 : } \
545 : \
546 : uint32_t aom_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \
547 : const uint8_t *src, int src_stride, int xoffset, int yoffset, \
548 : const uint8_t *dst, int dst_stride, uint32_t *sse, \
549 : const uint8_t *second_pred) { \
550 : uint16_t fdata3[(H + 1) * W]; \
551 : uint16_t temp2[H * W]; \
552 : DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
553 : \
554 : aom_highbd_var_filter_block2d_bil_first_pass( \
555 : src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
556 : aom_highbd_var_filter_block2d_bil_second_pass( \
557 : fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
558 : \
559 : aom_highbd_comp_avg_pred_c(temp3, second_pred, W, H, \
560 : CONVERT_TO_BYTEPTR(temp2), W); \
561 : \
562 : return aom_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
563 : dst, dst_stride, sse); \
564 : } \
565 : \
566 : uint32_t aom_highbd_12_sub_pixel_avg_variance##W##x##H##_c( \
567 : const uint8_t *src, int src_stride, int xoffset, int yoffset, \
568 : const uint8_t *dst, int dst_stride, uint32_t *sse, \
569 : const uint8_t *second_pred) { \
570 : uint16_t fdata3[(H + 1) * W]; \
571 : uint16_t temp2[H * W]; \
572 : DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
573 : \
574 : aom_highbd_var_filter_block2d_bil_first_pass( \
575 : src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
576 : aom_highbd_var_filter_block2d_bil_second_pass( \
577 : fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
578 : \
579 : aom_highbd_comp_avg_pred_c(temp3, second_pred, W, H, \
580 : CONVERT_TO_BYTEPTR(temp2), W); \
581 : \
582 : return aom_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
583 : dst, dst_stride, sse); \
584 : }
585 :
586 : /* All three forms of the variance are available in the same sizes. */
587 : #define HIGHBD_VARIANCES(W, H) \
588 : HIGHBD_VAR(W, H) \
589 : HIGHBD_SUBPIX_VAR(W, H) \
590 : HIGHBD_SUBPIX_AVG_VAR(W, H)
591 :
592 : #if CONFIG_AV1 && CONFIG_EXT_PARTITION
593 : HIGHBD_VARIANCES(128, 128)
594 : HIGHBD_VARIANCES(128, 64)
595 : HIGHBD_VARIANCES(64, 128)
596 : #endif // CONFIG_AV1 && CONFIG_EXT_PARTITION
597 0 : HIGHBD_VARIANCES(64, 64)
598 0 : HIGHBD_VARIANCES(64, 32)
599 0 : HIGHBD_VARIANCES(32, 64)
600 0 : HIGHBD_VARIANCES(32, 32)
601 0 : HIGHBD_VARIANCES(32, 16)
602 0 : HIGHBD_VARIANCES(16, 32)
603 0 : HIGHBD_VARIANCES(16, 16)
604 0 : HIGHBD_VARIANCES(16, 8)
605 0 : HIGHBD_VARIANCES(8, 16)
606 0 : HIGHBD_VARIANCES(8, 8)
607 0 : HIGHBD_VARIANCES(8, 4)
608 0 : HIGHBD_VARIANCES(4, 8)
609 0 : HIGHBD_VARIANCES(4, 4)
610 0 : HIGHBD_VARIANCES(4, 2)
611 0 : HIGHBD_VARIANCES(2, 4)
612 0 : HIGHBD_VARIANCES(2, 2)
613 :
614 0 : HIGHBD_GET_VAR(8)
615 0 : HIGHBD_GET_VAR(16)
616 :
617 0 : HIGHBD_MSE(16, 16)
618 0 : HIGHBD_MSE(16, 8)
619 0 : HIGHBD_MSE(8, 16)
620 0 : HIGHBD_MSE(8, 8)
621 :
622 0 : void aom_highbd_comp_avg_pred_c(uint16_t *comp_pred, const uint8_t *pred8,
623 : int width, int height, const uint8_t *ref8,
624 : int ref_stride) {
625 : int i, j;
626 0 : uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
627 0 : uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
628 0 : for (i = 0; i < height; ++i) {
629 0 : for (j = 0; j < width; ++j) {
630 0 : const int tmp = pred[j] + ref[j];
631 0 : comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
632 : }
633 0 : comp_pred += width;
634 0 : pred += width;
635 0 : ref += ref_stride;
636 : }
637 0 : }
638 :
639 0 : void aom_highbd_upsampled_pred_c(uint16_t *comp_pred, int width, int height,
640 : const uint8_t *ref8, int ref_stride) {
641 : int i, j;
642 0 : int stride = ref_stride << 3;
643 :
644 0 : uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
645 0 : for (i = 0; i < height; ++i) {
646 0 : for (j = 0; j < width; ++j) {
647 0 : comp_pred[j] = ref[(j << 3)];
648 : }
649 0 : comp_pred += width;
650 0 : ref += stride;
651 : }
652 0 : }
653 :
654 0 : void aom_highbd_comp_avg_upsampled_pred_c(uint16_t *comp_pred,
655 : const uint8_t *pred8, int width,
656 : int height, const uint8_t *ref8,
657 : int ref_stride) {
658 : int i, j;
659 0 : int stride = ref_stride << 3;
660 :
661 0 : uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
662 0 : uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
663 0 : for (i = 0; i < height; ++i) {
664 0 : for (j = 0; j < width; ++j) {
665 0 : const int tmp = pred[j] + ref[(j << 3)];
666 0 : comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
667 : }
668 0 : comp_pred += width;
669 0 : pred += width;
670 0 : ref += stride;
671 : }
672 0 : }
673 : #endif // CONFIG_HIGHBITDEPTH
674 :
675 : #if CONFIG_AV1 && CONFIG_EXT_INTER
676 0 : void aom_comp_mask_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width,
677 : int height, const uint8_t *ref, int ref_stride,
678 : const uint8_t *mask, int mask_stride,
679 : int invert_mask) {
680 : int i, j;
681 :
682 0 : for (i = 0; i < height; ++i) {
683 0 : for (j = 0; j < width; ++j) {
684 0 : if (!invert_mask)
685 0 : comp_pred[j] = AOM_BLEND_A64(mask[j], ref[j], pred[j]);
686 : else
687 0 : comp_pred[j] = AOM_BLEND_A64(mask[j], pred[j], ref[j]);
688 : }
689 0 : comp_pred += width;
690 0 : pred += width;
691 0 : ref += ref_stride;
692 0 : mask += mask_stride;
693 : }
694 0 : }
695 :
696 0 : void aom_comp_mask_upsampled_pred_c(uint8_t *comp_pred, const uint8_t *pred,
697 : int width, int height, const uint8_t *ref,
698 : int ref_stride, const uint8_t *mask,
699 : int mask_stride, int invert_mask) {
700 : int i, j;
701 0 : int stride = ref_stride << 3;
702 :
703 0 : for (i = 0; i < height; i++) {
704 0 : for (j = 0; j < width; j++) {
705 0 : if (!invert_mask)
706 0 : comp_pred[j] = AOM_BLEND_A64(mask[j], ref[(j << 3)], pred[j]);
707 : else
708 0 : comp_pred[j] = AOM_BLEND_A64(mask[j], pred[j], ref[(j << 3)]);
709 : }
710 0 : comp_pred += width;
711 0 : pred += width;
712 0 : ref += stride;
713 0 : mask += mask_stride;
714 : }
715 0 : }
716 :
717 : #define MASK_SUBPIX_VAR(W, H) \
718 : unsigned int aom_masked_sub_pixel_variance##W##x##H##_c( \
719 : const uint8_t *src, int src_stride, int xoffset, int yoffset, \
720 : const uint8_t *ref, int ref_stride, const uint8_t *second_pred, \
721 : const uint8_t *msk, int msk_stride, int invert_mask, \
722 : unsigned int *sse) { \
723 : uint16_t fdata3[(H + 1) * W]; \
724 : uint8_t temp2[H * W]; \
725 : DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
726 : \
727 : var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \
728 : bilinear_filters_2t[xoffset]); \
729 : var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
730 : bilinear_filters_2t[yoffset]); \
731 : \
732 : aom_comp_mask_pred_c(temp3, second_pred, W, H, temp2, W, msk, msk_stride, \
733 : invert_mask); \
734 : return aom_variance##W##x##H##_c(temp3, W, ref, ref_stride, sse); \
735 : }
736 :
737 0 : MASK_SUBPIX_VAR(4, 4)
738 0 : MASK_SUBPIX_VAR(4, 8)
739 0 : MASK_SUBPIX_VAR(8, 4)
740 0 : MASK_SUBPIX_VAR(8, 8)
741 0 : MASK_SUBPIX_VAR(8, 16)
742 0 : MASK_SUBPIX_VAR(16, 8)
743 0 : MASK_SUBPIX_VAR(16, 16)
744 0 : MASK_SUBPIX_VAR(16, 32)
745 0 : MASK_SUBPIX_VAR(32, 16)
746 0 : MASK_SUBPIX_VAR(32, 32)
747 0 : MASK_SUBPIX_VAR(32, 64)
748 0 : MASK_SUBPIX_VAR(64, 32)
749 0 : MASK_SUBPIX_VAR(64, 64)
750 : #if CONFIG_EXT_PARTITION
751 : MASK_SUBPIX_VAR(64, 128)
752 : MASK_SUBPIX_VAR(128, 64)
753 : MASK_SUBPIX_VAR(128, 128)
754 : #endif // CONFIG_EXT_PARTITION
755 :
756 : #if CONFIG_HIGHBITDEPTH
757 0 : void aom_highbd_comp_mask_pred_c(uint16_t *comp_pred, const uint8_t *pred8,
758 : int width, int height, const uint8_t *ref8,
759 : int ref_stride, const uint8_t *mask,
760 : int mask_stride, int invert_mask) {
761 : int i, j;
762 0 : uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
763 0 : uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
764 0 : for (i = 0; i < height; ++i) {
765 0 : for (j = 0; j < width; ++j) {
766 0 : if (!invert_mask)
767 0 : comp_pred[j] = AOM_BLEND_A64(mask[j], ref[j], pred[j]);
768 : else
769 0 : comp_pred[j] = AOM_BLEND_A64(mask[j], pred[j], ref[j]);
770 : }
771 0 : comp_pred += width;
772 0 : pred += width;
773 0 : ref += ref_stride;
774 0 : mask += mask_stride;
775 : }
776 0 : }
777 :
778 0 : void aom_highbd_comp_mask_upsampled_pred_c(uint16_t *comp_pred,
779 : const uint8_t *pred8, int width,
780 : int height, const uint8_t *ref8,
781 : int ref_stride, const uint8_t *mask,
782 : int mask_stride, int invert_mask) {
783 : int i, j;
784 0 : int stride = ref_stride << 3;
785 :
786 0 : uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
787 0 : uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
788 0 : for (i = 0; i < height; ++i) {
789 0 : for (j = 0; j < width; ++j) {
790 0 : if (!invert_mask)
791 0 : comp_pred[j] = AOM_BLEND_A64(mask[j], ref[j << 3], pred[j]);
792 : else
793 0 : comp_pred[j] = AOM_BLEND_A64(mask[j], pred[j], ref[j << 3]);
794 : }
795 0 : comp_pred += width;
796 0 : pred += width;
797 0 : ref += stride;
798 0 : mask += mask_stride;
799 : }
800 0 : }
801 :
802 : #define HIGHBD_MASK_SUBPIX_VAR(W, H) \
803 : unsigned int aom_highbd_8_masked_sub_pixel_variance##W##x##H##_c( \
804 : const uint8_t *src, int src_stride, int xoffset, int yoffset, \
805 : const uint8_t *ref, int ref_stride, const uint8_t *second_pred, \
806 : const uint8_t *msk, int msk_stride, int invert_mask, \
807 : unsigned int *sse) { \
808 : uint16_t fdata3[(H + 1) * W]; \
809 : uint16_t temp2[H * W]; \
810 : DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
811 : \
812 : aom_highbd_var_filter_block2d_bil_first_pass( \
813 : src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
814 : aom_highbd_var_filter_block2d_bil_second_pass( \
815 : fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
816 : \
817 : aom_highbd_comp_mask_pred_c(temp3, second_pred, W, H, \
818 : CONVERT_TO_BYTEPTR(temp2), W, msk, msk_stride, \
819 : invert_mask); \
820 : \
821 : return aom_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
822 : ref, ref_stride, sse); \
823 : } \
824 : \
825 : unsigned int aom_highbd_10_masked_sub_pixel_variance##W##x##H##_c( \
826 : const uint8_t *src, int src_stride, int xoffset, int yoffset, \
827 : const uint8_t *ref, int ref_stride, const uint8_t *second_pred, \
828 : const uint8_t *msk, int msk_stride, int invert_mask, \
829 : unsigned int *sse) { \
830 : uint16_t fdata3[(H + 1) * W]; \
831 : uint16_t temp2[H * W]; \
832 : DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
833 : \
834 : aom_highbd_var_filter_block2d_bil_first_pass( \
835 : src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
836 : aom_highbd_var_filter_block2d_bil_second_pass( \
837 : fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
838 : \
839 : aom_highbd_comp_mask_pred_c(temp3, second_pred, W, H, \
840 : CONVERT_TO_BYTEPTR(temp2), W, msk, msk_stride, \
841 : invert_mask); \
842 : \
843 : return aom_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
844 : ref, ref_stride, sse); \
845 : } \
846 : \
847 : unsigned int aom_highbd_12_masked_sub_pixel_variance##W##x##H##_c( \
848 : const uint8_t *src, int src_stride, int xoffset, int yoffset, \
849 : const uint8_t *ref, int ref_stride, const uint8_t *second_pred, \
850 : const uint8_t *msk, int msk_stride, int invert_mask, \
851 : unsigned int *sse) { \
852 : uint16_t fdata3[(H + 1) * W]; \
853 : uint16_t temp2[H * W]; \
854 : DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
855 : \
856 : aom_highbd_var_filter_block2d_bil_first_pass( \
857 : src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
858 : aom_highbd_var_filter_block2d_bil_second_pass( \
859 : fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
860 : \
861 : aom_highbd_comp_mask_pred_c(temp3, second_pred, W, H, \
862 : CONVERT_TO_BYTEPTR(temp2), W, msk, msk_stride, \
863 : invert_mask); \
864 : \
865 : return aom_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
866 : ref, ref_stride, sse); \
867 : }
868 :
869 0 : HIGHBD_MASK_SUBPIX_VAR(4, 4)
870 0 : HIGHBD_MASK_SUBPIX_VAR(4, 8)
871 0 : HIGHBD_MASK_SUBPIX_VAR(8, 4)
872 0 : HIGHBD_MASK_SUBPIX_VAR(8, 8)
873 0 : HIGHBD_MASK_SUBPIX_VAR(8, 16)
874 0 : HIGHBD_MASK_SUBPIX_VAR(16, 8)
875 0 : HIGHBD_MASK_SUBPIX_VAR(16, 16)
876 0 : HIGHBD_MASK_SUBPIX_VAR(16, 32)
877 0 : HIGHBD_MASK_SUBPIX_VAR(32, 16)
878 0 : HIGHBD_MASK_SUBPIX_VAR(32, 32)
879 0 : HIGHBD_MASK_SUBPIX_VAR(32, 64)
880 0 : HIGHBD_MASK_SUBPIX_VAR(64, 32)
881 0 : HIGHBD_MASK_SUBPIX_VAR(64, 64)
882 : #if CONFIG_EXT_PARTITION
883 : HIGHBD_MASK_SUBPIX_VAR(64, 128)
884 : HIGHBD_MASK_SUBPIX_VAR(128, 64)
885 : HIGHBD_MASK_SUBPIX_VAR(128, 128)
886 : #endif // CONFIG_EXT_PARTITION
887 : #endif // CONFIG_HIGHBITDEPTH
888 : #endif // CONFIG_AV1 && CONFIG_EXT_INTER
889 :
890 : #if CONFIG_AV1 && CONFIG_MOTION_VAR
891 0 : static INLINE void obmc_variance(const uint8_t *pre, int pre_stride,
892 : const int32_t *wsrc, const int32_t *mask,
893 : int w, int h, unsigned int *sse, int *sum) {
894 : int i, j;
895 :
896 0 : *sse = 0;
897 0 : *sum = 0;
898 :
899 0 : for (i = 0; i < h; i++) {
900 0 : for (j = 0; j < w; j++) {
901 0 : int diff = ROUND_POWER_OF_TWO_SIGNED(wsrc[j] - pre[j] * mask[j], 12);
902 0 : *sum += diff;
903 0 : *sse += diff * diff;
904 : }
905 :
906 0 : pre += pre_stride;
907 0 : wsrc += w;
908 0 : mask += w;
909 : }
910 0 : }
911 :
912 : #define OBMC_VAR(W, H) \
913 : unsigned int aom_obmc_variance##W##x##H##_c( \
914 : const uint8_t *pre, int pre_stride, const int32_t *wsrc, \
915 : const int32_t *mask, unsigned int *sse) { \
916 : int sum; \
917 : obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum); \
918 : return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H)); \
919 : }
920 :
921 : #define OBMC_SUBPIX_VAR(W, H) \
922 : unsigned int aom_obmc_sub_pixel_variance##W##x##H##_c( \
923 : const uint8_t *pre, int pre_stride, int xoffset, int yoffset, \
924 : const int32_t *wsrc, const int32_t *mask, unsigned int *sse) { \
925 : uint16_t fdata3[(H + 1) * W]; \
926 : uint8_t temp2[H * W]; \
927 : \
928 : var_filter_block2d_bil_first_pass(pre, fdata3, pre_stride, 1, H + 1, W, \
929 : bilinear_filters_2t[xoffset]); \
930 : var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
931 : bilinear_filters_2t[yoffset]); \
932 : \
933 : return aom_obmc_variance##W##x##H##_c(temp2, W, wsrc, mask, sse); \
934 : }
935 :
936 0 : OBMC_VAR(4, 4)
937 0 : OBMC_SUBPIX_VAR(4, 4)
938 :
939 0 : OBMC_VAR(4, 8)
940 0 : OBMC_SUBPIX_VAR(4, 8)
941 :
942 0 : OBMC_VAR(8, 4)
943 0 : OBMC_SUBPIX_VAR(8, 4)
944 :
945 0 : OBMC_VAR(8, 8)
946 0 : OBMC_SUBPIX_VAR(8, 8)
947 :
948 0 : OBMC_VAR(8, 16)
949 0 : OBMC_SUBPIX_VAR(8, 16)
950 :
951 0 : OBMC_VAR(16, 8)
952 0 : OBMC_SUBPIX_VAR(16, 8)
953 :
954 0 : OBMC_VAR(16, 16)
955 0 : OBMC_SUBPIX_VAR(16, 16)
956 :
957 0 : OBMC_VAR(16, 32)
958 0 : OBMC_SUBPIX_VAR(16, 32)
959 :
960 0 : OBMC_VAR(32, 16)
961 0 : OBMC_SUBPIX_VAR(32, 16)
962 :
963 0 : OBMC_VAR(32, 32)
964 0 : OBMC_SUBPIX_VAR(32, 32)
965 :
966 0 : OBMC_VAR(32, 64)
967 0 : OBMC_SUBPIX_VAR(32, 64)
968 :
969 0 : OBMC_VAR(64, 32)
970 0 : OBMC_SUBPIX_VAR(64, 32)
971 :
972 0 : OBMC_VAR(64, 64)
973 0 : OBMC_SUBPIX_VAR(64, 64)
974 :
975 : #if CONFIG_EXT_PARTITION
976 : OBMC_VAR(64, 128)
977 : OBMC_SUBPIX_VAR(64, 128)
978 :
979 : OBMC_VAR(128, 64)
980 : OBMC_SUBPIX_VAR(128, 64)
981 :
982 : OBMC_VAR(128, 128)
983 : OBMC_SUBPIX_VAR(128, 128)
984 : #endif // CONFIG_EXT_PARTITION
985 :
986 : #if CONFIG_HIGHBITDEPTH
987 0 : static INLINE void highbd_obmc_variance64(const uint8_t *pre8, int pre_stride,
988 : const int32_t *wsrc,
989 : const int32_t *mask, int w, int h,
990 : uint64_t *sse, int64_t *sum) {
991 : int i, j;
992 0 : uint16_t *pre = CONVERT_TO_SHORTPTR(pre8);
993 :
994 0 : *sse = 0;
995 0 : *sum = 0;
996 :
997 0 : for (i = 0; i < h; i++) {
998 0 : for (j = 0; j < w; j++) {
999 0 : int diff = ROUND_POWER_OF_TWO_SIGNED(wsrc[j] - pre[j] * mask[j], 12);
1000 0 : *sum += diff;
1001 0 : *sse += diff * diff;
1002 : }
1003 :
1004 0 : pre += pre_stride;
1005 0 : wsrc += w;
1006 0 : mask += w;
1007 : }
1008 0 : }
1009 :
1010 0 : static INLINE void highbd_obmc_variance(const uint8_t *pre8, int pre_stride,
1011 : const int32_t *wsrc,
1012 : const int32_t *mask, int w, int h,
1013 : unsigned int *sse, int *sum) {
1014 : int64_t sum64;
1015 : uint64_t sse64;
1016 0 : highbd_obmc_variance64(pre8, pre_stride, wsrc, mask, w, h, &sse64, &sum64);
1017 0 : *sum = (int)sum64;
1018 0 : *sse = (unsigned int)sse64;
1019 0 : }
1020 :
1021 0 : static INLINE void highbd_10_obmc_variance(const uint8_t *pre8, int pre_stride,
1022 : const int32_t *wsrc,
1023 : const int32_t *mask, int w, int h,
1024 : unsigned int *sse, int *sum) {
1025 : int64_t sum64;
1026 : uint64_t sse64;
1027 0 : highbd_obmc_variance64(pre8, pre_stride, wsrc, mask, w, h, &sse64, &sum64);
1028 0 : *sum = (int)ROUND_POWER_OF_TWO(sum64, 2);
1029 0 : *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 4);
1030 0 : }
1031 :
1032 0 : static INLINE void highbd_12_obmc_variance(const uint8_t *pre8, int pre_stride,
1033 : const int32_t *wsrc,
1034 : const int32_t *mask, int w, int h,
1035 : unsigned int *sse, int *sum) {
1036 : int64_t sum64;
1037 : uint64_t sse64;
1038 0 : highbd_obmc_variance64(pre8, pre_stride, wsrc, mask, w, h, &sse64, &sum64);
1039 0 : *sum = (int)ROUND_POWER_OF_TWO(sum64, 4);
1040 0 : *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 8);
1041 0 : }
1042 :
1043 : #define HIGHBD_OBMC_VAR(W, H) \
1044 : unsigned int aom_highbd_obmc_variance##W##x##H##_c( \
1045 : const uint8_t *pre, int pre_stride, const int32_t *wsrc, \
1046 : const int32_t *mask, unsigned int *sse) { \
1047 : int sum; \
1048 : highbd_obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum); \
1049 : return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H)); \
1050 : } \
1051 : \
1052 : unsigned int aom_highbd_10_obmc_variance##W##x##H##_c( \
1053 : const uint8_t *pre, int pre_stride, const int32_t *wsrc, \
1054 : const int32_t *mask, unsigned int *sse) { \
1055 : int sum; \
1056 : int64_t var; \
1057 : highbd_10_obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum); \
1058 : var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
1059 : return (var >= 0) ? (uint32_t)var : 0; \
1060 : } \
1061 : \
1062 : unsigned int aom_highbd_12_obmc_variance##W##x##H##_c( \
1063 : const uint8_t *pre, int pre_stride, const int32_t *wsrc, \
1064 : const int32_t *mask, unsigned int *sse) { \
1065 : int sum; \
1066 : int64_t var; \
1067 : highbd_12_obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum); \
1068 : var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
1069 : return (var >= 0) ? (uint32_t)var : 0; \
1070 : }
1071 :
1072 : #define HIGHBD_OBMC_SUBPIX_VAR(W, H) \
1073 : unsigned int aom_highbd_obmc_sub_pixel_variance##W##x##H##_c( \
1074 : const uint8_t *pre, int pre_stride, int xoffset, int yoffset, \
1075 : const int32_t *wsrc, const int32_t *mask, unsigned int *sse) { \
1076 : uint16_t fdata3[(H + 1) * W]; \
1077 : uint16_t temp2[H * W]; \
1078 : \
1079 : aom_highbd_var_filter_block2d_bil_first_pass( \
1080 : pre, fdata3, pre_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
1081 : aom_highbd_var_filter_block2d_bil_second_pass( \
1082 : fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
1083 : \
1084 : return aom_highbd_obmc_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
1085 : wsrc, mask, sse); \
1086 : } \
1087 : \
1088 : unsigned int aom_highbd_10_obmc_sub_pixel_variance##W##x##H##_c( \
1089 : const uint8_t *pre, int pre_stride, int xoffset, int yoffset, \
1090 : const int32_t *wsrc, const int32_t *mask, unsigned int *sse) { \
1091 : uint16_t fdata3[(H + 1) * W]; \
1092 : uint16_t temp2[H * W]; \
1093 : \
1094 : aom_highbd_var_filter_block2d_bil_first_pass( \
1095 : pre, fdata3, pre_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
1096 : aom_highbd_var_filter_block2d_bil_second_pass( \
1097 : fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
1098 : \
1099 : return aom_highbd_10_obmc_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
1100 : W, wsrc, mask, sse); \
1101 : } \
1102 : \
1103 : unsigned int aom_highbd_12_obmc_sub_pixel_variance##W##x##H##_c( \
1104 : const uint8_t *pre, int pre_stride, int xoffset, int yoffset, \
1105 : const int32_t *wsrc, const int32_t *mask, unsigned int *sse) { \
1106 : uint16_t fdata3[(H + 1) * W]; \
1107 : uint16_t temp2[H * W]; \
1108 : \
1109 : aom_highbd_var_filter_block2d_bil_first_pass( \
1110 : pre, fdata3, pre_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
1111 : aom_highbd_var_filter_block2d_bil_second_pass( \
1112 : fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
1113 : \
1114 : return aom_highbd_12_obmc_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
1115 : W, wsrc, mask, sse); \
1116 : }
1117 :
1118 0 : HIGHBD_OBMC_VAR(4, 4)
1119 0 : HIGHBD_OBMC_SUBPIX_VAR(4, 4)
1120 :
1121 0 : HIGHBD_OBMC_VAR(4, 8)
1122 0 : HIGHBD_OBMC_SUBPIX_VAR(4, 8)
1123 :
1124 0 : HIGHBD_OBMC_VAR(8, 4)
1125 0 : HIGHBD_OBMC_SUBPIX_VAR(8, 4)
1126 :
1127 0 : HIGHBD_OBMC_VAR(8, 8)
1128 0 : HIGHBD_OBMC_SUBPIX_VAR(8, 8)
1129 :
1130 0 : HIGHBD_OBMC_VAR(8, 16)
1131 0 : HIGHBD_OBMC_SUBPIX_VAR(8, 16)
1132 :
1133 0 : HIGHBD_OBMC_VAR(16, 8)
1134 0 : HIGHBD_OBMC_SUBPIX_VAR(16, 8)
1135 :
1136 0 : HIGHBD_OBMC_VAR(16, 16)
1137 0 : HIGHBD_OBMC_SUBPIX_VAR(16, 16)
1138 :
1139 0 : HIGHBD_OBMC_VAR(16, 32)
1140 0 : HIGHBD_OBMC_SUBPIX_VAR(16, 32)
1141 :
1142 0 : HIGHBD_OBMC_VAR(32, 16)
1143 0 : HIGHBD_OBMC_SUBPIX_VAR(32, 16)
1144 :
1145 0 : HIGHBD_OBMC_VAR(32, 32)
1146 0 : HIGHBD_OBMC_SUBPIX_VAR(32, 32)
1147 :
1148 0 : HIGHBD_OBMC_VAR(32, 64)
1149 0 : HIGHBD_OBMC_SUBPIX_VAR(32, 64)
1150 :
1151 0 : HIGHBD_OBMC_VAR(64, 32)
1152 0 : HIGHBD_OBMC_SUBPIX_VAR(64, 32)
1153 :
1154 0 : HIGHBD_OBMC_VAR(64, 64)
1155 0 : HIGHBD_OBMC_SUBPIX_VAR(64, 64)
1156 :
1157 : #if CONFIG_EXT_PARTITION
1158 : HIGHBD_OBMC_VAR(64, 128)
1159 : HIGHBD_OBMC_SUBPIX_VAR(64, 128)
1160 :
1161 : HIGHBD_OBMC_VAR(128, 64)
1162 : HIGHBD_OBMC_SUBPIX_VAR(128, 64)
1163 :
1164 : HIGHBD_OBMC_VAR(128, 128)
1165 : HIGHBD_OBMC_SUBPIX_VAR(128, 128)
1166 : #endif // CONFIG_EXT_PARTITION
1167 : #endif // CONFIG_HIGHBITDEPTH
1168 : #endif // CONFIG_AV1 && CONFIG_MOTION_VAR
|