LCOV - output.info - third_party/aom/aom

LCOV - code coverage report

Current view:	top level - third_party/aom/aom_dsp - variance.c (source / functions)		Hit	Total	Coverage
Test:	output.info	Lines:	0	375	0.0 %
Date:	2017-07-14 16:53:18	Functions:	0	403	0.0 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : /*
       2             :  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
       3             :  *
       4             :  * This source code is subject to the terms of the BSD 2 Clause License and
       5             :  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
       6             :  * was not distributed with this source code in the LICENSE file, you can
       7             :  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
       8             :  * Media Patent License 1.0 was not distributed with this source code in the
       9             :  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
      10             :  */
      11             : #include <stdlib.h>
      12             : 
      13             : #include "./aom_config.h"
      14             : #include "./aom_dsp_rtcd.h"
      15             : 
      16             : #include "aom_ports/mem.h"
      17             : #include "aom/aom_integer.h"
      18             : 
      19             : #include "aom_dsp/variance.h"
      20             : #include "aom_dsp/aom_filter.h"
      21             : #include "aom_dsp/blend.h"
      22             : 
      23           0 : uint32_t aom_get4x4sse_cs_c(const uint8_t *a, int a_stride, const uint8_t *b,
      24             :                             int b_stride) {
      25           0 :   int distortion = 0;
      26             :   int r, c;
      27             : 
      28           0 :   for (r = 0; r < 4; ++r) {
      29           0 :     for (c = 0; c < 4; ++c) {
      30           0 :       int diff = a[c] - b[c];
      31           0 :       distortion += diff * diff;
      32             :     }
      33             : 
      34           0 :     a += a_stride;
      35           0 :     b += b_stride;
      36             :   }
      37             : 
      38           0 :   return distortion;
      39             : }
      40             : 
      41           0 : uint32_t aom_get_mb_ss_c(const int16_t *a) {
      42           0 :   unsigned int i, sum = 0;
      43             : 
      44           0 :   for (i = 0; i < 256; ++i) {
      45           0 :     sum += a[i] * a[i];
      46             :   }
      47             : 
      48           0 :   return sum;
      49             : }
      50             : 
      51           0 : uint32_t aom_variance_halfpixvar16x16_h_c(const uint8_t *a, int a_stride,
      52             :                                           const uint8_t *b, int b_stride,
      53             :                                           uint32_t *sse) {
      54           0 :   return aom_sub_pixel_variance16x16_c(a, a_stride, 4, 0, b, b_stride, sse);
      55             : }
      56             : 
      57           0 : uint32_t aom_variance_halfpixvar16x16_v_c(const uint8_t *a, int a_stride,
      58             :                                           const uint8_t *b, int b_stride,
      59             :                                           uint32_t *sse) {
      60           0 :   return aom_sub_pixel_variance16x16_c(a, a_stride, 0, 4, b, b_stride, sse);
      61             : }
      62             : 
      63           0 : uint32_t aom_variance_halfpixvar16x16_hv_c(const uint8_t *a, int a_stride,
      64             :                                            const uint8_t *b, int b_stride,
      65             :                                            uint32_t *sse) {
      66           0 :   return aom_sub_pixel_variance16x16_c(a, a_stride, 4, 4, b, b_stride, sse);
      67             : }
      68             : 
      69           0 : static void variance(const uint8_t *a, int a_stride, const uint8_t *b,
      70             :                      int b_stride, int w, int h, uint32_t *sse, int *sum) {
      71             :   int i, j;
      72             : 
      73           0 :   *sum = 0;
      74           0 :   *sse = 0;
      75             : 
      76           0 :   for (i = 0; i < h; ++i) {
      77           0 :     for (j = 0; j < w; ++j) {
      78           0 :       const int diff = a[j] - b[j];
      79           0 :       *sum += diff;
      80           0 :       *sse += diff * diff;
      81             :     }
      82             : 
      83           0 :     a += a_stride;
      84           0 :     b += b_stride;
      85             :   }
      86           0 : }
      87             : 
      88           0 : uint32_t aom_sse_odd_size(const uint8_t *a, int a_stride, const uint8_t *b,
      89             :                           int b_stride, int w, int h) {
      90             :   uint32_t sse;
      91             :   int sum;
      92           0 :   variance(a, a_stride, b, b_stride, w, h, &sse, &sum);
      93           0 :   return sse;
      94             : }
      95             : 
      96             : // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
      97             : // or vertical direction to produce the filtered output block. Used to implement
      98             : // the first-pass of 2-D separable filter.
      99             : //
     100             : // Produces int16_t output to retain precision for the next pass. Two filter
     101             : // taps should sum to FILTER_WEIGHT. pixel_step defines whether the filter is
     102             : // applied horizontally (pixel_step = 1) or vertically (pixel_step = stride).
     103             : // It defines the offset required to move from one input to the next.
     104           0 : static void var_filter_block2d_bil_first_pass(const uint8_t *a, uint16_t *b,
     105             :                                               unsigned int src_pixels_per_line,
     106             :                                               int pixel_step,
     107             :                                               unsigned int output_height,
     108             :                                               unsigned int output_width,
     109             :                                               const uint8_t *filter) {
     110             :   unsigned int i, j;
     111             : 
     112           0 :   for (i = 0; i < output_height; ++i) {
     113           0 :     for (j = 0; j < output_width; ++j) {
     114           0 :       b[j] = ROUND_POWER_OF_TWO(
     115             :           (int)a[0] * filter[0] + (int)a[pixel_step] * filter[1], FILTER_BITS);
     116             : 
     117           0 :       ++a;
     118             :     }
     119             : 
     120           0 :     a += src_pixels_per_line - output_width;
     121           0 :     b += output_width;
     122             :   }
     123           0 : }
     124             : 
     125             : // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
     126             : // or vertical direction to produce the filtered output block. Used to implement
     127             : // the second-pass of 2-D separable filter.
     128             : //
     129             : // Requires 16-bit input as produced by filter_block2d_bil_first_pass. Two
     130             : // filter taps should sum to FILTER_WEIGHT. pixel_step defines whether the
     131             : // filter is applied horizontally (pixel_step = 1) or vertically
     132             : // (pixel_step = stride). It defines the offset required to move from one input
     133             : // to the next. Output is 8-bit.
     134           0 : static void var_filter_block2d_bil_second_pass(const uint16_t *a, uint8_t *b,
     135             :                                                unsigned int src_pixels_per_line,
     136             :                                                unsigned int pixel_step,
     137             :                                                unsigned int output_height,
     138             :                                                unsigned int output_width,
     139             :                                                const uint8_t *filter) {
     140             :   unsigned int i, j;
     141             : 
     142           0 :   for (i = 0; i < output_height; ++i) {
     143           0 :     for (j = 0; j < output_width; ++j) {
     144           0 :       b[j] = ROUND_POWER_OF_TWO(
     145             :           (int)a[0] * filter[0] + (int)a[pixel_step] * filter[1], FILTER_BITS);
     146           0 :       ++a;
     147             :     }
     148             : 
     149           0 :     a += src_pixels_per_line - output_width;
     150           0 :     b += output_width;
     151             :   }
     152           0 : }
     153             : 
     154             : #define VAR(W, H)                                                    \
     155             :   uint32_t aom_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
     156             :                                      const uint8_t *b, int b_stride, \
     157             :                                      uint32_t *sse) {                \
     158             :     int sum;                                                         \
     159             :     variance(a, a_stride, b, b_stride, W, H, sse, &sum);             \
     160             :     return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H));        \
     161             :   }
     162             : 
     163             : #define SUBPIX_VAR(W, H)                                                \
     164             :   uint32_t aom_sub_pixel_variance##W##x##H##_c(                         \
     165             :       const uint8_t *a, int a_stride, int xoffset, int yoffset,         \
     166             :       const uint8_t *b, int b_stride, uint32_t *sse) {                  \
     167             :     uint16_t fdata3[(H + 1) * W];                                       \
     168             :     uint8_t temp2[H * W];                                               \
     169             :                                                                         \
     170             :     var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W, \
     171             :                                       bilinear_filters_2t[xoffset]);    \
     172             :     var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,       \
     173             :                                        bilinear_filters_2t[yoffset]);   \
     174             :                                                                         \
     175             :     return aom_variance##W##x##H##_c(temp2, W, b, b_stride, sse);       \
     176             :   }
     177             : 
     178             : #define SUBPIX_AVG_VAR(W, H)                                            \
     179             :   uint32_t aom_sub_pixel_avg_variance##W##x##H##_c(                     \
     180             :       const uint8_t *a, int a_stride, int xoffset, int yoffset,         \
     181             :       const uint8_t *b, int b_stride, uint32_t *sse,                    \
     182             :       const uint8_t *second_pred) {                                     \
     183             :     uint16_t fdata3[(H + 1) * W];                                       \
     184             :     uint8_t temp2[H * W];                                               \
     185             :     DECLARE_ALIGNED(16, uint8_t, temp3[H * W]);                         \
     186             :                                                                         \
     187             :     var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W, \
     188             :                                       bilinear_filters_2t[xoffset]);    \
     189             :     var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,       \
     190             :                                        bilinear_filters_2t[yoffset]);   \
     191             :                                                                         \
     192             :     aom_comp_avg_pred(temp3, second_pred, W, H, temp2, W);              \
     193             :                                                                         \
     194             :     return aom_variance##W##x##H##_c(temp3, W, b, b_stride, sse);       \
     195             :   }
     196             : 
     197             : /* Identical to the variance call except it takes an additional parameter, sum,
     198             :  * and returns that value using pass-by-reference instead of returning
     199             :  * sse - sum^2 / w*h
     200             :  */
     201             : #define GET_VAR(W, H)                                                         \
     202             :   void aom_get##W##x##H##var_c(const uint8_t *a, int a_stride,                \
     203             :                                const uint8_t *b, int b_stride, uint32_t *sse, \
     204             :                                int *sum) {                                    \
     205             :     variance(a, a_stride, b, b_stride, W, H, sse, sum);                       \
     206             :   }
     207             : 
     208             : /* Identical to the variance call except it does not calculate the
     209             :  * sse - sum^2 / w*h and returns sse in addtion to modifying the passed in
     210             :  * variable.
     211             :  */
     212             : #define MSE(W, H)                                               \
     213             :   uint32_t aom_mse##W##x##H##_c(const uint8_t *a, int a_stride, \
     214             :                                 const uint8_t *b, int b_stride, \
     215             :                                 uint32_t *sse) {                \
     216             :     int sum;                                                    \
     217             :     variance(a, a_stride, b, b_stride, W, H, sse, &sum);        \
     218             :     return *sse;                                                \
     219             :   }
     220             : 
     221             : /* All three forms of the variance are available in the same sizes. */
     222             : #define VARIANCES(W, H) \
     223             :   VAR(W, H)             \
     224             :   SUBPIX_VAR(W, H)      \
     225             :   SUBPIX_AVG_VAR(W, H)
     226             : 
     227             : #if CONFIG_AV1 && CONFIG_EXT_PARTITION
     228             : VARIANCES(128, 128)
     229             : VARIANCES(128, 64)
     230             : VARIANCES(64, 128)
     231             : #endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
     232           0 : VARIANCES(64, 64)
     233           0 : VARIANCES(64, 32)
     234           0 : VARIANCES(32, 64)
     235           0 : VARIANCES(32, 32)
     236           0 : VARIANCES(32, 16)
     237           0 : VARIANCES(16, 32)
     238           0 : VARIANCES(16, 16)
     239           0 : VARIANCES(16, 8)
     240           0 : VARIANCES(8, 16)
     241           0 : VARIANCES(8, 8)
     242           0 : VARIANCES(8, 4)
     243           0 : VARIANCES(4, 8)
     244           0 : VARIANCES(4, 4)
     245           0 : VARIANCES(4, 2)
     246           0 : VARIANCES(2, 4)
     247           0 : VARIANCES(2, 2)
     248             : 
     249           0 : GET_VAR(16, 16)
     250           0 : GET_VAR(8, 8)
     251             : 
     252           0 : MSE(16, 16)
     253           0 : MSE(16, 8)
     254           0 : MSE(8, 16)
     255           0 : MSE(8, 8)
     256             : 
     257           0 : void aom_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width,
     258             :                          int height, const uint8_t *ref, int ref_stride) {
     259             :   int i, j;
     260             : 
     261           0 :   for (i = 0; i < height; ++i) {
     262           0 :     for (j = 0; j < width; ++j) {
     263           0 :       const int tmp = pred[j] + ref[j];
     264           0 :       comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
     265             :     }
     266           0 :     comp_pred += width;
     267           0 :     pred += width;
     268           0 :     ref += ref_stride;
     269             :   }
     270           0 : }
     271             : 
     272             : // Get pred block from up-sampled reference.
     273           0 : void aom_upsampled_pred_c(uint8_t *comp_pred, int width, int height,
     274             :                           const uint8_t *ref, int ref_stride) {
     275             :   int i, j, k;
     276           0 :   int stride = ref_stride << 3;
     277             : 
     278           0 :   for (i = 0; i < height; i++) {
     279           0 :     for (j = 0, k = 0; j < width; j++, k += 8) {
     280           0 :       comp_pred[j] = ref[k];
     281             :     }
     282           0 :     comp_pred += width;
     283           0 :     ref += stride;
     284             :   }
     285           0 : }
     286             : 
     287           0 : void aom_comp_avg_upsampled_pred_c(uint8_t *comp_pred, const uint8_t *pred,
     288             :                                    int width, int height, const uint8_t *ref,
     289             :                                    int ref_stride) {
     290             :   int i, j;
     291           0 :   int stride = ref_stride << 3;
     292             : 
     293           0 :   for (i = 0; i < height; i++) {
     294           0 :     for (j = 0; j < width; j++) {
     295           0 :       const int tmp = ref[(j << 3)] + pred[j];
     296           0 :       comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
     297             :     }
     298           0 :     comp_pred += width;
     299           0 :     pred += width;
     300           0 :     ref += stride;
     301             :   }
     302           0 : }
     303             : 
     304             : #if CONFIG_HIGHBITDEPTH
     305           0 : static void highbd_variance64(const uint8_t *a8, int a_stride,
     306             :                               const uint8_t *b8, int b_stride, int w, int h,
     307             :                               uint64_t *sse, int64_t *sum) {
     308             :   int i, j;
     309             : 
     310           0 :   uint16_t *a = CONVERT_TO_SHORTPTR(a8);
     311           0 :   uint16_t *b = CONVERT_TO_SHORTPTR(b8);
     312           0 :   *sum = 0;
     313           0 :   *sse = 0;
     314             : 
     315           0 :   for (i = 0; i < h; ++i) {
     316           0 :     for (j = 0; j < w; ++j) {
     317           0 :       const int diff = a[j] - b[j];
     318           0 :       *sum += diff;
     319           0 :       *sse += diff * diff;
     320             :     }
     321           0 :     a += a_stride;
     322           0 :     b += b_stride;
     323             :   }
     324           0 : }
     325             : 
     326           0 : uint64_t aom_highbd_sse_odd_size(const uint8_t *a, int a_stride,
     327             :                                  const uint8_t *b, int b_stride, int w, int h) {
     328             :   uint64_t sse;
     329             :   int64_t sum;
     330           0 :   highbd_variance64(a, a_stride, b, b_stride, w, h, &sse, &sum);
     331           0 :   return sse;
     332             : }
     333             : 
     334           0 : static void highbd_8_variance(const uint8_t *a8, int a_stride,
     335             :                               const uint8_t *b8, int b_stride, int w, int h,
     336             :                               uint32_t *sse, int *sum) {
     337           0 :   uint64_t sse_long = 0;
     338           0 :   int64_t sum_long = 0;
     339           0 :   highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
     340           0 :   *sse = (uint32_t)sse_long;
     341           0 :   *sum = (int)sum_long;
     342           0 : }
     343             : 
     344           0 : static void highbd_10_variance(const uint8_t *a8, int a_stride,
     345             :                                const uint8_t *b8, int b_stride, int w, int h,
     346             :                                uint32_t *sse, int *sum) {
     347           0 :   uint64_t sse_long = 0;
     348           0 :   int64_t sum_long = 0;
     349           0 :   highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
     350           0 :   *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 4);
     351           0 :   *sum = (int)ROUND_POWER_OF_TWO(sum_long, 2);
     352           0 : }
     353             : 
     354           0 : static void highbd_12_variance(const uint8_t *a8, int a_stride,
     355             :                                const uint8_t *b8, int b_stride, int w, int h,
     356             :                                uint32_t *sse, int *sum) {
     357           0 :   uint64_t sse_long = 0;
     358           0 :   int64_t sum_long = 0;
     359           0 :   highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
     360           0 :   *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 8);
     361           0 :   *sum = (int)ROUND_POWER_OF_TWO(sum_long, 4);
     362           0 : }
     363             : 
     364             : #define HIGHBD_VAR(W, H)                                                       \
     365             :   uint32_t aom_highbd_8_variance##W##x##H##_c(const uint8_t *a, int a_stride,  \
     366             :                                               const uint8_t *b, int b_stride,  \
     367             :                                               uint32_t *sse) {                 \
     368             :     int sum;                                                                   \
     369             :     highbd_8_variance(a, a_stride, b, b_stride, W, H, sse, &sum);              \
     370             :     return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H));                  \
     371             :   }                                                                            \
     372             :                                                                                \
     373             :   uint32_t aom_highbd_10_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
     374             :                                                const uint8_t *b, int b_stride, \
     375             :                                                uint32_t *sse) {                \
     376             :     int sum;                                                                   \
     377             :     int64_t var;                                                               \
     378             :     highbd_10_variance(a, a_stride, b, b_stride, W, H, sse, &sum);             \
     379             :     var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H));                  \
     380             :     return (var >= 0) ? (uint32_t)var : 0;                                     \
     381             :   }                                                                            \
     382             :                                                                                \
     383             :   uint32_t aom_highbd_12_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
     384             :                                                const uint8_t *b, int b_stride, \
     385             :                                                uint32_t *sse) {                \
     386             :     int sum;                                                                   \
     387             :     int64_t var;                                                               \
     388             :     highbd_12_variance(a, a_stride, b, b_stride, W, H, sse, &sum);             \
     389             :     var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H));                  \
     390             :     return (var >= 0) ? (uint32_t)var : 0;                                     \
     391             :   }
     392             : 
     393             : #define HIGHBD_GET_VAR(S)                                                    \
     394             :   void aom_highbd_8_get##S##x##S##var_c(const uint8_t *src, int src_stride,  \
     395             :                                         const uint8_t *ref, int ref_stride,  \
     396             :                                         uint32_t *sse, int *sum) {           \
     397             :     highbd_8_variance(src, src_stride, ref, ref_stride, S, S, sse, sum);     \
     398             :   }                                                                          \
     399             :                                                                              \
     400             :   void aom_highbd_10_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
     401             :                                          const uint8_t *ref, int ref_stride, \
     402             :                                          uint32_t *sse, int *sum) {          \
     403             :     highbd_10_variance(src, src_stride, ref, ref_stride, S, S, sse, sum);    \
     404             :   }                                                                          \
     405             :                                                                              \
     406             :   void aom_highbd_12_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
     407             :                                          const uint8_t *ref, int ref_stride, \
     408             :                                          uint32_t *sse, int *sum) {          \
     409             :     highbd_12_variance(src, src_stride, ref, ref_stride, S, S, sse, sum);    \
     410             :   }
     411             : 
     412             : #define HIGHBD_MSE(W, H)                                                      \
     413             :   uint32_t aom_highbd_8_mse##W##x##H##_c(const uint8_t *src, int src_stride,  \
     414             :                                          const uint8_t *ref, int ref_stride,  \
     415             :                                          uint32_t *sse) {                     \
     416             :     int sum;                                                                  \
     417             :     highbd_8_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum);     \
     418             :     return *sse;                                                              \
     419             :   }                                                                           \
     420             :                                                                               \
     421             :   uint32_t aom_highbd_10_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
     422             :                                           const uint8_t *ref, int ref_stride, \
     423             :                                           uint32_t *sse) {                    \
     424             :     int sum;                                                                  \
     425             :     highbd_10_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum);    \
     426             :     return *sse;                                                              \
     427             :   }                                                                           \
     428             :                                                                               \
     429             :   uint32_t aom_highbd_12_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
     430             :                                           const uint8_t *ref, int ref_stride, \
     431             :                                           uint32_t *sse) {                    \
     432             :     int sum;                                                                  \
     433             :     highbd_12_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum);    \
     434             :     return *sse;                                                              \
     435             :   }
     436             : 
     437           0 : void aom_highbd_var_filter_block2d_bil_first_pass(
     438             :     const uint8_t *src_ptr8, uint16_t *output_ptr,
     439             :     unsigned int src_pixels_per_line, int pixel_step,
     440             :     unsigned int output_height, unsigned int output_width,
     441             :     const uint8_t *filter) {
     442             :   unsigned int i, j;
     443           0 :   uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8);
     444           0 :   for (i = 0; i < output_height; ++i) {
     445           0 :     for (j = 0; j < output_width; ++j) {
     446           0 :       output_ptr[j] = ROUND_POWER_OF_TWO(
     447             :           (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
     448             :           FILTER_BITS);
     449             : 
     450           0 :       ++src_ptr;
     451             :     }
     452             : 
     453             :     // Next row...
     454           0 :     src_ptr += src_pixels_per_line - output_width;
     455           0 :     output_ptr += output_width;
     456             :   }
     457           0 : }
     458             : 
     459           0 : void aom_highbd_var_filter_block2d_bil_second_pass(
     460             :     const uint16_t *src_ptr, uint16_t *output_ptr,
     461             :     unsigned int src_pixels_per_line, unsigned int pixel_step,
     462             :     unsigned int output_height, unsigned int output_width,
     463             :     const uint8_t *filter) {
     464             :   unsigned int i, j;
     465             : 
     466           0 :   for (i = 0; i < output_height; ++i) {
     467           0 :     for (j = 0; j < output_width; ++j) {
     468           0 :       output_ptr[j] = ROUND_POWER_OF_TWO(
     469             :           (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
     470             :           FILTER_BITS);
     471           0 :       ++src_ptr;
     472             :     }
     473             : 
     474           0 :     src_ptr += src_pixels_per_line - output_width;
     475           0 :     output_ptr += output_width;
     476             :   }
     477           0 : }
     478             : 
     479             : #define HIGHBD_SUBPIX_VAR(W, H)                                              \
     480             :   uint32_t aom_highbd_8_sub_pixel_variance##W##x##H##_c(                     \
     481             :       const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
     482             :       const uint8_t *dst, int dst_stride, uint32_t *sse) {                   \
     483             :     uint16_t fdata3[(H + 1) * W];                                            \
     484             :     uint16_t temp2[H * W];                                                   \
     485             :                                                                              \
     486             :     aom_highbd_var_filter_block2d_bil_first_pass(                            \
     487             :         src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
     488             :     aom_highbd_var_filter_block2d_bil_second_pass(                           \
     489             :         fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);            \
     490             :                                                                              \
     491             :     return aom_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W,  \
     492             :                                               dst, dst_stride, sse);         \
     493             :   }                                                                          \
     494             :                                                                              \
     495             :   uint32_t aom_highbd_10_sub_pixel_variance##W##x##H##_c(                    \
     496             :       const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
     497             :       const uint8_t *dst, int dst_stride, uint32_t *sse) {                   \
     498             :     uint16_t fdata3[(H + 1) * W];                                            \
     499             :     uint16_t temp2[H * W];                                                   \
     500             :                                                                              \
     501             :     aom_highbd_var_filter_block2d_bil_first_pass(                            \
     502             :         src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
     503             :     aom_highbd_var_filter_block2d_bil_second_pass(                           \
     504             :         fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);            \
     505             :                                                                              \
     506             :     return aom_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
     507             :                                                dst, dst_stride, sse);        \
     508             :   }                                                                          \
     509             :                                                                              \
     510             :   uint32_t aom_highbd_12_sub_pixel_variance##W##x##H##_c(                    \
     511             :       const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
     512             :       const uint8_t *dst, int dst_stride, uint32_t *sse) {                   \
     513             :     uint16_t fdata3[(H + 1) * W];                                            \
     514             :     uint16_t temp2[H * W];                                                   \
     515             :                                                                              \
     516             :     aom_highbd_var_filter_block2d_bil_first_pass(                            \
     517             :         src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
     518             :     aom_highbd_var_filter_block2d_bil_second_pass(                           \
     519             :         fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);            \
     520             :                                                                              \
     521             :     return aom_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
     522             :                                                dst, dst_stride, sse);        \
     523             :   }
     524             : 
     525             : #define HIGHBD_SUBPIX_AVG_VAR(W, H)                                          \
     526             :   uint32_t aom_highbd_8_sub_pixel_avg_variance##W##x##H##_c(                 \
     527             :       const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
     528             :       const uint8_t *dst, int dst_stride, uint32_t *sse,                     \
     529             :       const uint8_t *second_pred) {                                          \
     530             :     uint16_t fdata3[(H + 1) * W];                                            \
     531             :     uint16_t temp2[H * W];                                                   \
     532             :     DECLARE_ALIGNED(16, uint16_t, temp3[H * W]);                             \
     533             :                                                                              \
     534             :     aom_highbd_var_filter_block2d_bil_first_pass(                            \
     535             :         src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
     536             :     aom_highbd_var_filter_block2d_bil_second_pass(                           \
     537             :         fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);            \
     538             :                                                                              \
     539             :     aom_highbd_comp_avg_pred_c(temp3, second_pred, W, H,                     \
     540             :                                CONVERT_TO_BYTEPTR(temp2), W);                \
     541             :                                                                              \
     542             :     return aom_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W,  \
     543             :                                               dst, dst_stride, sse);         \
     544             :   }                                                                          \
     545             :                                                                              \
     546             :   uint32_t aom_highbd_10_sub_pixel_avg_variance##W##x##H##_c(                \
     547             :       const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
     548             :       const uint8_t *dst, int dst_stride, uint32_t *sse,                     \
     549             :       const uint8_t *second_pred) {                                          \
     550             :     uint16_t fdata3[(H + 1) * W];                                            \
     551             :     uint16_t temp2[H * W];                                                   \
     552             :     DECLARE_ALIGNED(16, uint16_t, temp3[H * W]);                             \
     553             :                                                                              \
     554             :     aom_highbd_var_filter_block2d_bil_first_pass(                            \
     555             :         src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
     556             :     aom_highbd_var_filter_block2d_bil_second_pass(                           \
     557             :         fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);            \
     558             :                                                                              \
     559             :     aom_highbd_comp_avg_pred_c(temp3, second_pred, W, H,                     \
     560             :                                CONVERT_TO_BYTEPTR(temp2), W);                \
     561             :                                                                              \
     562             :     return aom_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
     563             :                                                dst, dst_stride, sse);        \
     564             :   }                                                                          \
     565             :                                                                              \
     566             :   uint32_t aom_highbd_12_sub_pixel_avg_variance##W##x##H##_c(                \
     567             :       const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
     568             :       const uint8_t *dst, int dst_stride, uint32_t *sse,                     \
     569             :       const uint8_t *second_pred) {                                          \
     570             :     uint16_t fdata3[(H + 1) * W];                                            \
     571             :     uint16_t temp2[H * W];                                                   \
     572             :     DECLARE_ALIGNED(16, uint16_t, temp3[H * W]);                             \
     573             :                                                                              \
     574             :     aom_highbd_var_filter_block2d_bil_first_pass(                            \
     575             :         src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
     576             :     aom_highbd_var_filter_block2d_bil_second_pass(                           \
     577             :         fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);            \
     578             :                                                                              \
     579             :     aom_highbd_comp_avg_pred_c(temp3, second_pred, W, H,                     \
     580             :                                CONVERT_TO_BYTEPTR(temp2), W);                \
     581             :                                                                              \
     582             :     return aom_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
     583             :                                                dst, dst_stride, sse);        \
     584             :   }
     585             : 
     586             : /* All three forms of the variance are available in the same sizes. */
     587             : #define HIGHBD_VARIANCES(W, H) \
     588             :   HIGHBD_VAR(W, H)             \
     589             :   HIGHBD_SUBPIX_VAR(W, H)      \
     590             :   HIGHBD_SUBPIX_AVG_VAR(W, H)
     591             : 
     592             : #if CONFIG_AV1 && CONFIG_EXT_PARTITION
     593             : HIGHBD_VARIANCES(128, 128)
     594             : HIGHBD_VARIANCES(128, 64)
     595             : HIGHBD_VARIANCES(64, 128)
     596             : #endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
     597           0 : HIGHBD_VARIANCES(64, 64)
     598           0 : HIGHBD_VARIANCES(64, 32)
     599           0 : HIGHBD_VARIANCES(32, 64)
     600           0 : HIGHBD_VARIANCES(32, 32)
     601           0 : HIGHBD_VARIANCES(32, 16)
     602           0 : HIGHBD_VARIANCES(16, 32)
     603           0 : HIGHBD_VARIANCES(16, 16)
     604           0 : HIGHBD_VARIANCES(16, 8)
     605           0 : HIGHBD_VARIANCES(8, 16)
     606           0 : HIGHBD_VARIANCES(8, 8)
     607           0 : HIGHBD_VARIANCES(8, 4)
     608           0 : HIGHBD_VARIANCES(4, 8)
     609           0 : HIGHBD_VARIANCES(4, 4)
     610           0 : HIGHBD_VARIANCES(4, 2)
     611           0 : HIGHBD_VARIANCES(2, 4)
     612           0 : HIGHBD_VARIANCES(2, 2)
     613             : 
     614           0 : HIGHBD_GET_VAR(8)
     615           0 : HIGHBD_GET_VAR(16)
     616             : 
     617           0 : HIGHBD_MSE(16, 16)
     618           0 : HIGHBD_MSE(16, 8)
     619           0 : HIGHBD_MSE(8, 16)
     620           0 : HIGHBD_MSE(8, 8)
     621             : 
     622           0 : void aom_highbd_comp_avg_pred_c(uint16_t *comp_pred, const uint8_t *pred8,
     623             :                                 int width, int height, const uint8_t *ref8,
     624             :                                 int ref_stride) {
     625             :   int i, j;
     626           0 :   uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
     627           0 :   uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
     628           0 :   for (i = 0; i < height; ++i) {
     629           0 :     for (j = 0; j < width; ++j) {
     630           0 :       const int tmp = pred[j] + ref[j];
     631           0 :       comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
     632             :     }
     633           0 :     comp_pred += width;
     634           0 :     pred += width;
     635           0 :     ref += ref_stride;
     636             :   }
     637           0 : }
     638             : 
     639           0 : void aom_highbd_upsampled_pred_c(uint16_t *comp_pred, int width, int height,
     640             :                                  const uint8_t *ref8, int ref_stride) {
     641             :   int i, j;
     642           0 :   int stride = ref_stride << 3;
     643             : 
     644           0 :   uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
     645           0 :   for (i = 0; i < height; ++i) {
     646           0 :     for (j = 0; j < width; ++j) {
     647           0 :       comp_pred[j] = ref[(j << 3)];
     648             :     }
     649           0 :     comp_pred += width;
     650           0 :     ref += stride;
     651             :   }
     652           0 : }
     653             : 
     654           0 : void aom_highbd_comp_avg_upsampled_pred_c(uint16_t *comp_pred,
     655             :                                           const uint8_t *pred8, int width,
     656             :                                           int height, const uint8_t *ref8,
     657             :                                           int ref_stride) {
     658             :   int i, j;
     659           0 :   int stride = ref_stride << 3;
     660             : 
     661           0 :   uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
     662           0 :   uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
     663           0 :   for (i = 0; i < height; ++i) {
     664           0 :     for (j = 0; j < width; ++j) {
     665           0 :       const int tmp = pred[j] + ref[(j << 3)];
     666           0 :       comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
     667             :     }
     668           0 :     comp_pred += width;
     669           0 :     pred += width;
     670           0 :     ref += stride;
     671             :   }
     672           0 : }
     673             : #endif  // CONFIG_HIGHBITDEPTH
     674             : 
     675             : #if CONFIG_AV1 && CONFIG_EXT_INTER
     676           0 : void aom_comp_mask_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width,
     677             :                           int height, const uint8_t *ref, int ref_stride,
     678             :                           const uint8_t *mask, int mask_stride,
     679             :                           int invert_mask) {
     680             :   int i, j;
     681             : 
     682           0 :   for (i = 0; i < height; ++i) {
     683           0 :     for (j = 0; j < width; ++j) {
     684           0 :       if (!invert_mask)
     685           0 :         comp_pred[j] = AOM_BLEND_A64(mask[j], ref[j], pred[j]);
     686             :       else
     687           0 :         comp_pred[j] = AOM_BLEND_A64(mask[j], pred[j], ref[j]);
     688             :     }
     689           0 :     comp_pred += width;
     690           0 :     pred += width;
     691           0 :     ref += ref_stride;
     692           0 :     mask += mask_stride;
     693             :   }
     694           0 : }
     695             : 
     696           0 : void aom_comp_mask_upsampled_pred_c(uint8_t *comp_pred, const uint8_t *pred,
     697             :                                     int width, int height, const uint8_t *ref,
     698             :                                     int ref_stride, const uint8_t *mask,
     699             :                                     int mask_stride, int invert_mask) {
     700             :   int i, j;
     701           0 :   int stride = ref_stride << 3;
     702             : 
     703           0 :   for (i = 0; i < height; i++) {
     704           0 :     for (j = 0; j < width; j++) {
     705           0 :       if (!invert_mask)
     706           0 :         comp_pred[j] = AOM_BLEND_A64(mask[j], ref[(j << 3)], pred[j]);
     707             :       else
     708           0 :         comp_pred[j] = AOM_BLEND_A64(mask[j], pred[j], ref[(j << 3)]);
     709             :     }
     710           0 :     comp_pred += width;
     711           0 :     pred += width;
     712           0 :     ref += stride;
     713           0 :     mask += mask_stride;
     714             :   }
     715           0 : }
     716             : 
     717             : #define MASK_SUBPIX_VAR(W, H)                                                 \
     718             :   unsigned int aom_masked_sub_pixel_variance##W##x##H##_c(                    \
     719             :       const uint8_t *src, int src_stride, int xoffset, int yoffset,           \
     720             :       const uint8_t *ref, int ref_stride, const uint8_t *second_pred,         \
     721             :       const uint8_t *msk, int msk_stride, int invert_mask,                    \
     722             :       unsigned int *sse) {                                                    \
     723             :     uint16_t fdata3[(H + 1) * W];                                             \
     724             :     uint8_t temp2[H * W];                                                     \
     725             :     DECLARE_ALIGNED(16, uint8_t, temp3[H * W]);                               \
     726             :                                                                               \
     727             :     var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W,   \
     728             :                                       bilinear_filters_2t[xoffset]);          \
     729             :     var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,             \
     730             :                                        bilinear_filters_2t[yoffset]);         \
     731             :                                                                               \
     732             :     aom_comp_mask_pred_c(temp3, second_pred, W, H, temp2, W, msk, msk_stride, \
     733             :                          invert_mask);                                        \
     734             :     return aom_variance##W##x##H##_c(temp3, W, ref, ref_stride, sse);         \
     735             :   }
     736             : 
     737           0 : MASK_SUBPIX_VAR(4, 4)
     738           0 : MASK_SUBPIX_VAR(4, 8)
     739           0 : MASK_SUBPIX_VAR(8, 4)
     740           0 : MASK_SUBPIX_VAR(8, 8)
     741           0 : MASK_SUBPIX_VAR(8, 16)
     742           0 : MASK_SUBPIX_VAR(16, 8)
     743           0 : MASK_SUBPIX_VAR(16, 16)
     744           0 : MASK_SUBPIX_VAR(16, 32)
     745           0 : MASK_SUBPIX_VAR(32, 16)
     746           0 : MASK_SUBPIX_VAR(32, 32)
     747           0 : MASK_SUBPIX_VAR(32, 64)
     748           0 : MASK_SUBPIX_VAR(64, 32)
     749           0 : MASK_SUBPIX_VAR(64, 64)
     750             : #if CONFIG_EXT_PARTITION
     751             : MASK_SUBPIX_VAR(64, 128)
     752             : MASK_SUBPIX_VAR(128, 64)
     753             : MASK_SUBPIX_VAR(128, 128)
     754             : #endif  // CONFIG_EXT_PARTITION
     755             : 
     756             : #if CONFIG_HIGHBITDEPTH
     757           0 : void aom_highbd_comp_mask_pred_c(uint16_t *comp_pred, const uint8_t *pred8,
     758             :                                  int width, int height, const uint8_t *ref8,
     759             :                                  int ref_stride, const uint8_t *mask,
     760             :                                  int mask_stride, int invert_mask) {
     761             :   int i, j;
     762           0 :   uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
     763           0 :   uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
     764           0 :   for (i = 0; i < height; ++i) {
     765           0 :     for (j = 0; j < width; ++j) {
     766           0 :       if (!invert_mask)
     767           0 :         comp_pred[j] = AOM_BLEND_A64(mask[j], ref[j], pred[j]);
     768             :       else
     769           0 :         comp_pred[j] = AOM_BLEND_A64(mask[j], pred[j], ref[j]);
     770             :     }
     771           0 :     comp_pred += width;
     772           0 :     pred += width;
     773           0 :     ref += ref_stride;
     774           0 :     mask += mask_stride;
     775             :   }
     776           0 : }
     777             : 
     778           0 : void aom_highbd_comp_mask_upsampled_pred_c(uint16_t *comp_pred,
     779             :                                            const uint8_t *pred8, int width,
     780             :                                            int height, const uint8_t *ref8,
     781             :                                            int ref_stride, const uint8_t *mask,
     782             :                                            int mask_stride, int invert_mask) {
     783             :   int i, j;
     784           0 :   int stride = ref_stride << 3;
     785             : 
     786           0 :   uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
     787           0 :   uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
     788           0 :   for (i = 0; i < height; ++i) {
     789           0 :     for (j = 0; j < width; ++j) {
     790           0 :       if (!invert_mask)
     791           0 :         comp_pred[j] = AOM_BLEND_A64(mask[j], ref[j << 3], pred[j]);
     792             :       else
     793           0 :         comp_pred[j] = AOM_BLEND_A64(mask[j], pred[j], ref[j << 3]);
     794             :     }
     795           0 :     comp_pred += width;
     796           0 :     pred += width;
     797           0 :     ref += stride;
     798           0 :     mask += mask_stride;
     799             :   }
     800           0 : }
     801             : 
     802             : #define HIGHBD_MASK_SUBPIX_VAR(W, H)                                           \
     803             :   unsigned int aom_highbd_8_masked_sub_pixel_variance##W##x##H##_c(            \
     804             :       const uint8_t *src, int src_stride, int xoffset, int yoffset,            \
     805             :       const uint8_t *ref, int ref_stride, const uint8_t *second_pred,          \
     806             :       const uint8_t *msk, int msk_stride, int invert_mask,                     \
     807             :       unsigned int *sse) {                                                     \
     808             :     uint16_t fdata3[(H + 1) * W];                                              \
     809             :     uint16_t temp2[H * W];                                                     \
     810             :     DECLARE_ALIGNED(16, uint16_t, temp3[H * W]);                               \
     811             :                                                                                \
     812             :     aom_highbd_var_filter_block2d_bil_first_pass(                              \
     813             :         src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);   \
     814             :     aom_highbd_var_filter_block2d_bil_second_pass(                             \
     815             :         fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);              \
     816             :                                                                                \
     817             :     aom_highbd_comp_mask_pred_c(temp3, second_pred, W, H,                      \
     818             :                                 CONVERT_TO_BYTEPTR(temp2), W, msk, msk_stride, \
     819             :                                 invert_mask);                                  \
     820             :                                                                                \
     821             :     return aom_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W,    \
     822             :                                               ref, ref_stride, sse);           \
     823             :   }                                                                            \
     824             :                                                                                \
     825             :   unsigned int aom_highbd_10_masked_sub_pixel_variance##W##x##H##_c(           \
     826             :       const uint8_t *src, int src_stride, int xoffset, int yoffset,            \
     827             :       const uint8_t *ref, int ref_stride, const uint8_t *second_pred,          \
     828             :       const uint8_t *msk, int msk_stride, int invert_mask,                     \
     829             :       unsigned int *sse) {                                                     \
     830             :     uint16_t fdata3[(H + 1) * W];                                              \
     831             :     uint16_t temp2[H * W];                                                     \
     832             :     DECLARE_ALIGNED(16, uint16_t, temp3[H * W]);                               \
     833             :                                                                                \
     834             :     aom_highbd_var_filter_block2d_bil_first_pass(                              \
     835             :         src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);   \
     836             :     aom_highbd_var_filter_block2d_bil_second_pass(                             \
     837             :         fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);              \
     838             :                                                                                \
     839             :     aom_highbd_comp_mask_pred_c(temp3, second_pred, W, H,                      \
     840             :                                 CONVERT_TO_BYTEPTR(temp2), W, msk, msk_stride, \
     841             :                                 invert_mask);                                  \
     842             :                                                                                \
     843             :     return aom_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W,   \
     844             :                                                ref, ref_stride, sse);          \
     845             :   }                                                                            \
     846             :                                                                                \
     847             :   unsigned int aom_highbd_12_masked_sub_pixel_variance##W##x##H##_c(           \
     848             :       const uint8_t *src, int src_stride, int xoffset, int yoffset,            \
     849             :       const uint8_t *ref, int ref_stride, const uint8_t *second_pred,          \
     850             :       const uint8_t *msk, int msk_stride, int invert_mask,                     \
     851             :       unsigned int *sse) {                                                     \
     852             :     uint16_t fdata3[(H + 1) * W];                                              \
     853             :     uint16_t temp2[H * W];                                                     \
     854             :     DECLARE_ALIGNED(16, uint16_t, temp3[H * W]);                               \
     855             :                                                                                \
     856             :     aom_highbd_var_filter_block2d_bil_first_pass(                              \
     857             :         src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);   \
     858             :     aom_highbd_var_filter_block2d_bil_second_pass(                             \
     859             :         fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);              \
     860             :                                                                                \
     861             :     aom_highbd_comp_mask_pred_c(temp3, second_pred, W, H,                      \
     862             :                                 CONVERT_TO_BYTEPTR(temp2), W, msk, msk_stride, \
     863             :                                 invert_mask);                                  \
     864             :                                                                                \
     865             :     return aom_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W,   \
     866             :                                                ref, ref_stride, sse);          \
     867             :   }
     868             : 
     869           0 : HIGHBD_MASK_SUBPIX_VAR(4, 4)
     870           0 : HIGHBD_MASK_SUBPIX_VAR(4, 8)
     871           0 : HIGHBD_MASK_SUBPIX_VAR(8, 4)
     872           0 : HIGHBD_MASK_SUBPIX_VAR(8, 8)
     873           0 : HIGHBD_MASK_SUBPIX_VAR(8, 16)
     874           0 : HIGHBD_MASK_SUBPIX_VAR(16, 8)
     875           0 : HIGHBD_MASK_SUBPIX_VAR(16, 16)
     876           0 : HIGHBD_MASK_SUBPIX_VAR(16, 32)
     877           0 : HIGHBD_MASK_SUBPIX_VAR(32, 16)
     878           0 : HIGHBD_MASK_SUBPIX_VAR(32, 32)
     879           0 : HIGHBD_MASK_SUBPIX_VAR(32, 64)
     880           0 : HIGHBD_MASK_SUBPIX_VAR(64, 32)
     881           0 : HIGHBD_MASK_SUBPIX_VAR(64, 64)
     882             : #if CONFIG_EXT_PARTITION
     883             : HIGHBD_MASK_SUBPIX_VAR(64, 128)
     884             : HIGHBD_MASK_SUBPIX_VAR(128, 64)
     885             : HIGHBD_MASK_SUBPIX_VAR(128, 128)
     886             : #endif  // CONFIG_EXT_PARTITION
     887             : #endif  // CONFIG_HIGHBITDEPTH
     888             : #endif  // CONFIG_AV1 && CONFIG_EXT_INTER
     889             : 
     890             : #if CONFIG_AV1 && CONFIG_MOTION_VAR
     891           0 : static INLINE void obmc_variance(const uint8_t *pre, int pre_stride,
     892             :                                  const int32_t *wsrc, const int32_t *mask,
     893             :                                  int w, int h, unsigned int *sse, int *sum) {
     894             :   int i, j;
     895             : 
     896           0 :   *sse = 0;
     897           0 :   *sum = 0;
     898             : 
     899           0 :   for (i = 0; i < h; i++) {
     900           0 :     for (j = 0; j < w; j++) {
     901           0 :       int diff = ROUND_POWER_OF_TWO_SIGNED(wsrc[j] - pre[j] * mask[j], 12);
     902           0 :       *sum += diff;
     903           0 :       *sse += diff * diff;
     904             :     }
     905             : 
     906           0 :     pre += pre_stride;
     907           0 :     wsrc += w;
     908           0 :     mask += w;
     909             :   }
     910           0 : }
     911             : 
     912             : #define OBMC_VAR(W, H)                                            \
     913             :   unsigned int aom_obmc_variance##W##x##H##_c(                    \
     914             :       const uint8_t *pre, int pre_stride, const int32_t *wsrc,    \
     915             :       const int32_t *mask, unsigned int *sse) {                   \
     916             :     int sum;                                                      \
     917             :     obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum);  \
     918             :     return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H)); \
     919             :   }
     920             : 
     921             : #define OBMC_SUBPIX_VAR(W, H)                                               \
     922             :   unsigned int aom_obmc_sub_pixel_variance##W##x##H##_c(                    \
     923             :       const uint8_t *pre, int pre_stride, int xoffset, int yoffset,         \
     924             :       const int32_t *wsrc, const int32_t *mask, unsigned int *sse) {        \
     925             :     uint16_t fdata3[(H + 1) * W];                                           \
     926             :     uint8_t temp2[H * W];                                                   \
     927             :                                                                             \
     928             :     var_filter_block2d_bil_first_pass(pre, fdata3, pre_stride, 1, H + 1, W, \
     929             :                                       bilinear_filters_2t[xoffset]);        \
     930             :     var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,           \
     931             :                                        bilinear_filters_2t[yoffset]);       \
     932             :                                                                             \
     933             :     return aom_obmc_variance##W##x##H##_c(temp2, W, wsrc, mask, sse);       \
     934             :   }
     935             : 
     936           0 : OBMC_VAR(4, 4)
     937           0 : OBMC_SUBPIX_VAR(4, 4)
     938             : 
     939           0 : OBMC_VAR(4, 8)
     940           0 : OBMC_SUBPIX_VAR(4, 8)
     941             : 
     942           0 : OBMC_VAR(8, 4)
     943           0 : OBMC_SUBPIX_VAR(8, 4)
     944             : 
     945           0 : OBMC_VAR(8, 8)
     946           0 : OBMC_SUBPIX_VAR(8, 8)
     947             : 
     948           0 : OBMC_VAR(8, 16)
     949           0 : OBMC_SUBPIX_VAR(8, 16)
     950             : 
     951           0 : OBMC_VAR(16, 8)
     952           0 : OBMC_SUBPIX_VAR(16, 8)
     953             : 
     954           0 : OBMC_VAR(16, 16)
     955           0 : OBMC_SUBPIX_VAR(16, 16)
     956             : 
     957           0 : OBMC_VAR(16, 32)
     958           0 : OBMC_SUBPIX_VAR(16, 32)
     959             : 
     960           0 : OBMC_VAR(32, 16)
     961           0 : OBMC_SUBPIX_VAR(32, 16)
     962             : 
     963           0 : OBMC_VAR(32, 32)
     964           0 : OBMC_SUBPIX_VAR(32, 32)
     965             : 
     966           0 : OBMC_VAR(32, 64)
     967           0 : OBMC_SUBPIX_VAR(32, 64)
     968             : 
     969           0 : OBMC_VAR(64, 32)
     970           0 : OBMC_SUBPIX_VAR(64, 32)
     971             : 
     972           0 : OBMC_VAR(64, 64)
     973           0 : OBMC_SUBPIX_VAR(64, 64)
     974             : 
     975             : #if CONFIG_EXT_PARTITION
     976             : OBMC_VAR(64, 128)
     977             : OBMC_SUBPIX_VAR(64, 128)
     978             : 
     979             : OBMC_VAR(128, 64)
     980             : OBMC_SUBPIX_VAR(128, 64)
     981             : 
     982             : OBMC_VAR(128, 128)
     983             : OBMC_SUBPIX_VAR(128, 128)
     984             : #endif  // CONFIG_EXT_PARTITION
     985             : 
     986             : #if CONFIG_HIGHBITDEPTH
     987           0 : static INLINE void highbd_obmc_variance64(const uint8_t *pre8, int pre_stride,
     988             :                                           const int32_t *wsrc,
     989             :                                           const int32_t *mask, int w, int h,
     990             :                                           uint64_t *sse, int64_t *sum) {
     991             :   int i, j;
     992           0 :   uint16_t *pre = CONVERT_TO_SHORTPTR(pre8);
     993             : 
     994           0 :   *sse = 0;
     995           0 :   *sum = 0;
     996             : 
     997           0 :   for (i = 0; i < h; i++) {
     998           0 :     for (j = 0; j < w; j++) {
     999           0 :       int diff = ROUND_POWER_OF_TWO_SIGNED(wsrc[j] - pre[j] * mask[j], 12);
    1000           0 :       *sum += diff;
    1001           0 :       *sse += diff * diff;
    1002             :     }
    1003             : 
    1004           0 :     pre += pre_stride;
    1005           0 :     wsrc += w;
    1006           0 :     mask += w;
    1007             :   }
    1008           0 : }
    1009             : 
    1010           0 : static INLINE void highbd_obmc_variance(const uint8_t *pre8, int pre_stride,
    1011             :                                         const int32_t *wsrc,
    1012             :                                         const int32_t *mask, int w, int h,
    1013             :                                         unsigned int *sse, int *sum) {
    1014             :   int64_t sum64;
    1015             :   uint64_t sse64;
    1016           0 :   highbd_obmc_variance64(pre8, pre_stride, wsrc, mask, w, h, &sse64, &sum64);
    1017           0 :   *sum = (int)sum64;
    1018           0 :   *sse = (unsigned int)sse64;
    1019           0 : }
    1020             : 
    1021           0 : static INLINE void highbd_10_obmc_variance(const uint8_t *pre8, int pre_stride,
    1022             :                                            const int32_t *wsrc,
    1023             :                                            const int32_t *mask, int w, int h,
    1024             :                                            unsigned int *sse, int *sum) {
    1025             :   int64_t sum64;
    1026             :   uint64_t sse64;
    1027           0 :   highbd_obmc_variance64(pre8, pre_stride, wsrc, mask, w, h, &sse64, &sum64);
    1028           0 :   *sum = (int)ROUND_POWER_OF_TWO(sum64, 2);
    1029           0 :   *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 4);
    1030           0 : }
    1031             : 
    1032           0 : static INLINE void highbd_12_obmc_variance(const uint8_t *pre8, int pre_stride,
    1033             :                                            const int32_t *wsrc,
    1034             :                                            const int32_t *mask, int w, int h,
    1035             :                                            unsigned int *sse, int *sum) {
    1036             :   int64_t sum64;
    1037             :   uint64_t sse64;
    1038           0 :   highbd_obmc_variance64(pre8, pre_stride, wsrc, mask, w, h, &sse64, &sum64);
    1039           0 :   *sum = (int)ROUND_POWER_OF_TWO(sum64, 4);
    1040           0 :   *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 8);
    1041           0 : }
    1042             : 
    1043             : #define HIGHBD_OBMC_VAR(W, H)                                              \
    1044             :   unsigned int aom_highbd_obmc_variance##W##x##H##_c(                      \
    1045             :       const uint8_t *pre, int pre_stride, const int32_t *wsrc,             \
    1046             :       const int32_t *mask, unsigned int *sse) {                            \
    1047             :     int sum;                                                               \
    1048             :     highbd_obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum);    \
    1049             :     return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H));          \
    1050             :   }                                                                        \
    1051             :                                                                            \
    1052             :   unsigned int aom_highbd_10_obmc_variance##W##x##H##_c(                   \
    1053             :       const uint8_t *pre, int pre_stride, const int32_t *wsrc,             \
    1054             :       const int32_t *mask, unsigned int *sse) {                            \
    1055             :     int sum;                                                               \
    1056             :     int64_t var;                                                           \
    1057             :     highbd_10_obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum); \
    1058             :     var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H));              \
    1059             :     return (var >= 0) ? (uint32_t)var : 0;                                 \
    1060             :   }                                                                        \
    1061             :                                                                            \
    1062             :   unsigned int aom_highbd_12_obmc_variance##W##x##H##_c(                   \
    1063             :       const uint8_t *pre, int pre_stride, const int32_t *wsrc,             \
    1064             :       const int32_t *mask, unsigned int *sse) {                            \
    1065             :     int sum;                                                               \
    1066             :     int64_t var;                                                           \
    1067             :     highbd_12_obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum); \
    1068             :     var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H));              \
    1069             :     return (var >= 0) ? (uint32_t)var : 0;                                 \
    1070             :   }
    1071             : 
    1072             : #define HIGHBD_OBMC_SUBPIX_VAR(W, H)                                           \
    1073             :   unsigned int aom_highbd_obmc_sub_pixel_variance##W##x##H##_c(                \
    1074             :       const uint8_t *pre, int pre_stride, int xoffset, int yoffset,            \
    1075             :       const int32_t *wsrc, const int32_t *mask, unsigned int *sse) {           \
    1076             :     uint16_t fdata3[(H + 1) * W];                                              \
    1077             :     uint16_t temp2[H * W];                                                     \
    1078             :                                                                                \
    1079             :     aom_highbd_var_filter_block2d_bil_first_pass(                              \
    1080             :         pre, fdata3, pre_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);   \
    1081             :     aom_highbd_var_filter_block2d_bil_second_pass(                             \
    1082             :         fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);              \
    1083             :                                                                                \
    1084             :     return aom_highbd_obmc_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
    1085             :                                                  wsrc, mask, sse);             \
    1086             :   }                                                                            \
    1087             :                                                                                \
    1088             :   unsigned int aom_highbd_10_obmc_sub_pixel_variance##W##x##H##_c(             \
    1089             :       const uint8_t *pre, int pre_stride, int xoffset, int yoffset,            \
    1090             :       const int32_t *wsrc, const int32_t *mask, unsigned int *sse) {           \
    1091             :     uint16_t fdata3[(H + 1) * W];                                              \
    1092             :     uint16_t temp2[H * W];                                                     \
    1093             :                                                                                \
    1094             :     aom_highbd_var_filter_block2d_bil_first_pass(                              \
    1095             :         pre, fdata3, pre_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);   \
    1096             :     aom_highbd_var_filter_block2d_bil_second_pass(                             \
    1097             :         fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);              \
    1098             :                                                                                \
    1099             :     return aom_highbd_10_obmc_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
    1100             :                                                     W, wsrc, mask, sse);       \
    1101             :   }                                                                            \
    1102             :                                                                                \
    1103             :   unsigned int aom_highbd_12_obmc_sub_pixel_variance##W##x##H##_c(             \
    1104             :       const uint8_t *pre, int pre_stride, int xoffset, int yoffset,            \
    1105             :       const int32_t *wsrc, const int32_t *mask, unsigned int *sse) {           \
    1106             :     uint16_t fdata3[(H + 1) * W];                                              \
    1107             :     uint16_t temp2[H * W];                                                     \
    1108             :                                                                                \
    1109             :     aom_highbd_var_filter_block2d_bil_first_pass(                              \
    1110             :         pre, fdata3, pre_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);   \
    1111             :     aom_highbd_var_filter_block2d_bil_second_pass(                             \
    1112             :         fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);              \
    1113             :                                                                                \
    1114             :     return aom_highbd_12_obmc_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
    1115             :                                                     W, wsrc, mask, sse);       \
    1116             :   }
    1117             : 
    1118           0 : HIGHBD_OBMC_VAR(4, 4)
    1119           0 : HIGHBD_OBMC_SUBPIX_VAR(4, 4)
    1120             : 
    1121           0 : HIGHBD_OBMC_VAR(4, 8)
    1122           0 : HIGHBD_OBMC_SUBPIX_VAR(4, 8)
    1123             : 
    1124           0 : HIGHBD_OBMC_VAR(8, 4)
    1125           0 : HIGHBD_OBMC_SUBPIX_VAR(8, 4)
    1126             : 
    1127           0 : HIGHBD_OBMC_VAR(8, 8)
    1128           0 : HIGHBD_OBMC_SUBPIX_VAR(8, 8)
    1129             : 
    1130           0 : HIGHBD_OBMC_VAR(8, 16)
    1131           0 : HIGHBD_OBMC_SUBPIX_VAR(8, 16)
    1132             : 
    1133           0 : HIGHBD_OBMC_VAR(16, 8)
    1134           0 : HIGHBD_OBMC_SUBPIX_VAR(16, 8)
    1135             : 
    1136           0 : HIGHBD_OBMC_VAR(16, 16)
    1137           0 : HIGHBD_OBMC_SUBPIX_VAR(16, 16)
    1138             : 
    1139           0 : HIGHBD_OBMC_VAR(16, 32)
    1140           0 : HIGHBD_OBMC_SUBPIX_VAR(16, 32)
    1141             : 
    1142           0 : HIGHBD_OBMC_VAR(32, 16)
    1143           0 : HIGHBD_OBMC_SUBPIX_VAR(32, 16)
    1144             : 
    1145           0 : HIGHBD_OBMC_VAR(32, 32)
    1146           0 : HIGHBD_OBMC_SUBPIX_VAR(32, 32)
    1147             : 
    1148           0 : HIGHBD_OBMC_VAR(32, 64)
    1149           0 : HIGHBD_OBMC_SUBPIX_VAR(32, 64)
    1150             : 
    1151           0 : HIGHBD_OBMC_VAR(64, 32)
    1152           0 : HIGHBD_OBMC_SUBPIX_VAR(64, 32)
    1153             : 
    1154           0 : HIGHBD_OBMC_VAR(64, 64)
    1155           0 : HIGHBD_OBMC_SUBPIX_VAR(64, 64)
    1156             : 
    1157             : #if CONFIG_EXT_PARTITION
    1158             : HIGHBD_OBMC_VAR(64, 128)
    1159             : HIGHBD_OBMC_SUBPIX_VAR(64, 128)
    1160             : 
    1161             : HIGHBD_OBMC_VAR(128, 64)
    1162             : HIGHBD_OBMC_SUBPIX_VAR(128, 64)
    1163             : 
    1164             : HIGHBD_OBMC_VAR(128, 128)
    1165             : HIGHBD_OBMC_SUBPIX_VAR(128, 128)
    1166             : #endif  // CONFIG_EXT_PARTITION
    1167             : #endif  // CONFIG_HIGHBITDEPTH
    1168             : #endif  // CONFIG_AV1 && CONFIG_MOTION_VAR

Generated by: LCOV version 1.13