LCOV - code coverage report
Current view: top level - media/libvpx/libvpx/vpx_dsp/x86 - variance_sse2.c (source / functions) Hit Total Coverage
Test: output.info Lines: 0 146 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 74 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
       3             :  *
       4             :  *  Use of this source code is governed by a BSD-style license
       5             :  *  that can be found in the LICENSE file in the root of the source
       6             :  *  tree. An additional intellectual property rights grant can be found
       7             :  *  in the file PATENTS.  All contributing project authors may
       8             :  *  be found in the AUTHORS file in the root of the source tree.
       9             :  */
      10             : 
      11             : #include <emmintrin.h>  // SSE2
      12             : 
      13             : #include "./vpx_config.h"
      14             : #include "./vpx_dsp_rtcd.h"
      15             : 
      16             : #include "vpx_ports/mem.h"
      17             : 
      18             : typedef void (*getNxMvar_fn_t)(const unsigned char *src, int src_stride,
      19             :                                const unsigned char *ref, int ref_stride,
      20             :                                unsigned int *sse, int *sum);
      21             : 
      22           0 : unsigned int vpx_get_mb_ss_sse2(const int16_t *src) {
      23           0 :   __m128i vsum = _mm_setzero_si128();
      24             :   int i;
      25             : 
      26           0 :   for (i = 0; i < 32; ++i) {
      27           0 :     const __m128i v = _mm_loadu_si128((const __m128i *)src);
      28           0 :     vsum = _mm_add_epi32(vsum, _mm_madd_epi16(v, v));
      29           0 :     src += 8;
      30             :   }
      31             : 
      32           0 :   vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 8));
      33           0 :   vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 4));
      34           0 :   return _mm_cvtsi128_si32(vsum);
      35             : }
      36             : 
      37             : #define READ64(p, stride, i)                                  \
      38             :   _mm_unpacklo_epi8(                                          \
      39             :       _mm_cvtsi32_si128(*(const uint32_t *)(p + i * stride)), \
      40             :       _mm_cvtsi32_si128(*(const uint32_t *)(p + (i + 1) * stride)))
      41             : 
      42           0 : static void get4x4var_sse2(const uint8_t *src, int src_stride,
      43             :                            const uint8_t *ref, int ref_stride,
      44             :                            unsigned int *sse, int *sum) {
      45           0 :   const __m128i zero = _mm_setzero_si128();
      46           0 :   const __m128i src0 = _mm_unpacklo_epi8(READ64(src, src_stride, 0), zero);
      47           0 :   const __m128i src1 = _mm_unpacklo_epi8(READ64(src, src_stride, 2), zero);
      48           0 :   const __m128i ref0 = _mm_unpacklo_epi8(READ64(ref, ref_stride, 0), zero);
      49           0 :   const __m128i ref1 = _mm_unpacklo_epi8(READ64(ref, ref_stride, 2), zero);
      50           0 :   const __m128i diff0 = _mm_sub_epi16(src0, ref0);
      51           0 :   const __m128i diff1 = _mm_sub_epi16(src1, ref1);
      52             : 
      53             :   // sum
      54           0 :   __m128i vsum = _mm_add_epi16(diff0, diff1);
      55           0 :   vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8));
      56           0 :   vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4));
      57           0 :   vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 2));
      58           0 :   *sum = (int16_t)_mm_extract_epi16(vsum, 0);
      59             : 
      60             :   // sse
      61           0 :   vsum =
      62           0 :       _mm_add_epi32(_mm_madd_epi16(diff0, diff0), _mm_madd_epi16(diff1, diff1));
      63           0 :   vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 8));
      64           0 :   vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 4));
      65           0 :   *sse = _mm_cvtsi128_si32(vsum);
      66           0 : }
      67             : 
      68           0 : void vpx_get8x8var_sse2(const uint8_t *src, int src_stride, const uint8_t *ref,
      69             :                         int ref_stride, unsigned int *sse, int *sum) {
      70           0 :   const __m128i zero = _mm_setzero_si128();
      71           0 :   __m128i vsum = _mm_setzero_si128();
      72           0 :   __m128i vsse = _mm_setzero_si128();
      73             :   int i;
      74             : 
      75           0 :   for (i = 0; i < 8; i += 2) {
      76           0 :     const __m128i src0 = _mm_unpacklo_epi8(
      77           0 :         _mm_loadl_epi64((const __m128i *)(src + i * src_stride)), zero);
      78           0 :     const __m128i ref0 = _mm_unpacklo_epi8(
      79           0 :         _mm_loadl_epi64((const __m128i *)(ref + i * ref_stride)), zero);
      80           0 :     const __m128i diff0 = _mm_sub_epi16(src0, ref0);
      81             : 
      82           0 :     const __m128i src1 = _mm_unpacklo_epi8(
      83           0 :         _mm_loadl_epi64((const __m128i *)(src + (i + 1) * src_stride)), zero);
      84           0 :     const __m128i ref1 = _mm_unpacklo_epi8(
      85           0 :         _mm_loadl_epi64((const __m128i *)(ref + (i + 1) * ref_stride)), zero);
      86           0 :     const __m128i diff1 = _mm_sub_epi16(src1, ref1);
      87             : 
      88           0 :     vsum = _mm_add_epi16(vsum, diff0);
      89           0 :     vsum = _mm_add_epi16(vsum, diff1);
      90           0 :     vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff0, diff0));
      91           0 :     vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff1, diff1));
      92             :   }
      93             : 
      94             :   // sum
      95           0 :   vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8));
      96           0 :   vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4));
      97           0 :   vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 2));
      98           0 :   *sum = (int16_t)_mm_extract_epi16(vsum, 0);
      99             : 
     100             :   // sse
     101           0 :   vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 8));
     102           0 :   vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 4));
     103           0 :   *sse = _mm_cvtsi128_si32(vsse);
     104           0 : }
     105             : 
     106           0 : void vpx_get16x16var_sse2(const uint8_t *src, int src_stride,
     107             :                           const uint8_t *ref, int ref_stride, unsigned int *sse,
     108             :                           int *sum) {
     109           0 :   const __m128i zero = _mm_setzero_si128();
     110           0 :   __m128i vsum = _mm_setzero_si128();
     111           0 :   __m128i vsse = _mm_setzero_si128();
     112             :   int i;
     113             : 
     114           0 :   for (i = 0; i < 16; ++i) {
     115           0 :     const __m128i s = _mm_loadu_si128((const __m128i *)src);
     116           0 :     const __m128i r = _mm_loadu_si128((const __m128i *)ref);
     117             : 
     118           0 :     const __m128i src0 = _mm_unpacklo_epi8(s, zero);
     119           0 :     const __m128i ref0 = _mm_unpacklo_epi8(r, zero);
     120           0 :     const __m128i diff0 = _mm_sub_epi16(src0, ref0);
     121             : 
     122           0 :     const __m128i src1 = _mm_unpackhi_epi8(s, zero);
     123           0 :     const __m128i ref1 = _mm_unpackhi_epi8(r, zero);
     124           0 :     const __m128i diff1 = _mm_sub_epi16(src1, ref1);
     125             : 
     126           0 :     vsum = _mm_add_epi16(vsum, diff0);
     127           0 :     vsum = _mm_add_epi16(vsum, diff1);
     128           0 :     vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff0, diff0));
     129           0 :     vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff1, diff1));
     130             : 
     131           0 :     src += src_stride;
     132           0 :     ref += ref_stride;
     133             :   }
     134             : 
     135             :   // sum
     136           0 :   vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8));
     137           0 :   vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4));
     138           0 :   *sum =
     139           0 :       (int16_t)_mm_extract_epi16(vsum, 0) + (int16_t)_mm_extract_epi16(vsum, 1);
     140             : 
     141             :   // sse
     142           0 :   vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 8));
     143           0 :   vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 4));
     144           0 :   *sse = _mm_cvtsi128_si32(vsse);
     145           0 : }
     146             : 
     147           0 : static void variance_sse2(const unsigned char *src, int src_stride,
     148             :                           const unsigned char *ref, int ref_stride, int w,
     149             :                           int h, unsigned int *sse, int *sum,
     150             :                           getNxMvar_fn_t var_fn, int block_size) {
     151             :   int i, j;
     152             : 
     153           0 :   *sse = 0;
     154           0 :   *sum = 0;
     155             : 
     156           0 :   for (i = 0; i < h; i += block_size) {
     157           0 :     for (j = 0; j < w; j += block_size) {
     158             :       unsigned int sse0;
     159             :       int sum0;
     160           0 :       var_fn(src + src_stride * i + j, src_stride, ref + ref_stride * i + j,
     161             :              ref_stride, &sse0, &sum0);
     162           0 :       *sse += sse0;
     163           0 :       *sum += sum0;
     164             :     }
     165             :   }
     166           0 : }
     167             : 
     168           0 : unsigned int vpx_variance4x4_sse2(const unsigned char *src, int src_stride,
     169             :                                   const unsigned char *ref, int ref_stride,
     170             :                                   unsigned int *sse) {
     171             :   int sum;
     172           0 :   get4x4var_sse2(src, src_stride, ref, ref_stride, sse, &sum);
     173           0 :   return *sse - ((sum * sum) >> 4);
     174             : }
     175             : 
     176           0 : unsigned int vpx_variance8x4_sse2(const uint8_t *src, int src_stride,
     177             :                                   const uint8_t *ref, int ref_stride,
     178             :                                   unsigned int *sse) {
     179             :   int sum;
     180           0 :   variance_sse2(src, src_stride, ref, ref_stride, 8, 4, sse, &sum,
     181             :                 get4x4var_sse2, 4);
     182           0 :   return *sse - ((sum * sum) >> 5);
     183             : }
     184             : 
     185           0 : unsigned int vpx_variance4x8_sse2(const uint8_t *src, int src_stride,
     186             :                                   const uint8_t *ref, int ref_stride,
     187             :                                   unsigned int *sse) {
     188             :   int sum;
     189           0 :   variance_sse2(src, src_stride, ref, ref_stride, 4, 8, sse, &sum,
     190             :                 get4x4var_sse2, 4);
     191           0 :   return *sse - ((sum * sum) >> 5);
     192             : }
     193             : 
     194           0 : unsigned int vpx_variance8x8_sse2(const unsigned char *src, int src_stride,
     195             :                                   const unsigned char *ref, int ref_stride,
     196             :                                   unsigned int *sse) {
     197             :   int sum;
     198           0 :   vpx_get8x8var_sse2(src, src_stride, ref, ref_stride, sse, &sum);
     199           0 :   return *sse - ((sum * sum) >> 6);
     200             : }
     201             : 
     202           0 : unsigned int vpx_variance16x8_sse2(const unsigned char *src, int src_stride,
     203             :                                    const unsigned char *ref, int ref_stride,
     204             :                                    unsigned int *sse) {
     205             :   int sum;
     206           0 :   variance_sse2(src, src_stride, ref, ref_stride, 16, 8, sse, &sum,
     207             :                 vpx_get8x8var_sse2, 8);
     208           0 :   return *sse - ((sum * sum) >> 7);
     209             : }
     210             : 
     211           0 : unsigned int vpx_variance8x16_sse2(const unsigned char *src, int src_stride,
     212             :                                    const unsigned char *ref, int ref_stride,
     213             :                                    unsigned int *sse) {
     214             :   int sum;
     215           0 :   variance_sse2(src, src_stride, ref, ref_stride, 8, 16, sse, &sum,
     216             :                 vpx_get8x8var_sse2, 8);
     217           0 :   return *sse - ((sum * sum) >> 7);
     218             : }
     219             : 
     220           0 : unsigned int vpx_variance16x16_sse2(const unsigned char *src, int src_stride,
     221             :                                     const unsigned char *ref, int ref_stride,
     222             :                                     unsigned int *sse) {
     223             :   int sum;
     224           0 :   vpx_get16x16var_sse2(src, src_stride, ref, ref_stride, sse, &sum);
     225           0 :   return *sse - (((uint32_t)((int64_t)sum * sum)) >> 8);
     226             : }
     227             : 
     228           0 : unsigned int vpx_variance32x32_sse2(const uint8_t *src, int src_stride,
     229             :                                     const uint8_t *ref, int ref_stride,
     230             :                                     unsigned int *sse) {
     231             :   int sum;
     232           0 :   variance_sse2(src, src_stride, ref, ref_stride, 32, 32, sse, &sum,
     233             :                 vpx_get16x16var_sse2, 16);
     234           0 :   return *sse - (unsigned int)(((int64_t)sum * sum) >> 10);
     235             : }
     236             : 
     237           0 : unsigned int vpx_variance32x16_sse2(const uint8_t *src, int src_stride,
     238             :                                     const uint8_t *ref, int ref_stride,
     239             :                                     unsigned int *sse) {
     240             :   int sum;
     241           0 :   variance_sse2(src, src_stride, ref, ref_stride, 32, 16, sse, &sum,
     242             :                 vpx_get16x16var_sse2, 16);
     243           0 :   return *sse - (unsigned int)(((int64_t)sum * sum) >> 9);
     244             : }
     245             : 
     246           0 : unsigned int vpx_variance16x32_sse2(const uint8_t *src, int src_stride,
     247             :                                     const uint8_t *ref, int ref_stride,
     248             :                                     unsigned int *sse) {
     249             :   int sum;
     250           0 :   variance_sse2(src, src_stride, ref, ref_stride, 16, 32, sse, &sum,
     251             :                 vpx_get16x16var_sse2, 16);
     252           0 :   return *sse - (unsigned int)(((int64_t)sum * sum) >> 9);
     253             : }
     254             : 
     255           0 : unsigned int vpx_variance64x64_sse2(const uint8_t *src, int src_stride,
     256             :                                     const uint8_t *ref, int ref_stride,
     257             :                                     unsigned int *sse) {
     258             :   int sum;
     259           0 :   variance_sse2(src, src_stride, ref, ref_stride, 64, 64, sse, &sum,
     260             :                 vpx_get16x16var_sse2, 16);
     261           0 :   return *sse - (unsigned int)(((int64_t)sum * sum) >> 12);
     262             : }
     263             : 
     264           0 : unsigned int vpx_variance64x32_sse2(const uint8_t *src, int src_stride,
     265             :                                     const uint8_t *ref, int ref_stride,
     266             :                                     unsigned int *sse) {
     267             :   int sum;
     268           0 :   variance_sse2(src, src_stride, ref, ref_stride, 64, 32, sse, &sum,
     269             :                 vpx_get16x16var_sse2, 16);
     270           0 :   return *sse - (unsigned int)(((int64_t)sum * sum) >> 11);
     271             : }
     272             : 
     273           0 : unsigned int vpx_variance32x64_sse2(const uint8_t *src, int src_stride,
     274             :                                     const uint8_t *ref, int ref_stride,
     275             :                                     unsigned int *sse) {
     276             :   int sum;
     277           0 :   variance_sse2(src, src_stride, ref, ref_stride, 32, 64, sse, &sum,
     278             :                 vpx_get16x16var_sse2, 16);
     279           0 :   return *sse - (unsigned int)(((int64_t)sum * sum) >> 11);
     280             : }
     281             : 
     282           0 : unsigned int vpx_mse8x8_sse2(const uint8_t *src, int src_stride,
     283             :                              const uint8_t *ref, int ref_stride,
     284             :                              unsigned int *sse) {
     285           0 :   vpx_variance8x8_sse2(src, src_stride, ref, ref_stride, sse);
     286           0 :   return *sse;
     287             : }
     288             : 
     289           0 : unsigned int vpx_mse8x16_sse2(const uint8_t *src, int src_stride,
     290             :                               const uint8_t *ref, int ref_stride,
     291             :                               unsigned int *sse) {
     292           0 :   vpx_variance8x16_sse2(src, src_stride, ref, ref_stride, sse);
     293           0 :   return *sse;
     294             : }
     295             : 
     296           0 : unsigned int vpx_mse16x8_sse2(const uint8_t *src, int src_stride,
     297             :                               const uint8_t *ref, int ref_stride,
     298             :                               unsigned int *sse) {
     299           0 :   vpx_variance16x8_sse2(src, src_stride, ref, ref_stride, sse);
     300           0 :   return *sse;
     301             : }
     302             : 
     303           0 : unsigned int vpx_mse16x16_sse2(const uint8_t *src, int src_stride,
     304             :                                const uint8_t *ref, int ref_stride,
     305             :                                unsigned int *sse) {
     306           0 :   vpx_variance16x16_sse2(src, src_stride, ref, ref_stride, sse);
     307           0 :   return *sse;
     308             : }
     309             : 
     310             : // The 2 unused parameters are place holders for PIC enabled build.
     311             : // These definitions are for functions defined in subpel_variance.asm
     312             : #define DECL(w, opt)                                                           \
     313             :   int vpx_sub_pixel_variance##w##xh_##opt(                                     \
     314             :       const uint8_t *src, ptrdiff_t src_stride, int x_offset, int y_offset,    \
     315             :       const uint8_t *dst, ptrdiff_t dst_stride, int height, unsigned int *sse, \
     316             :       void *unused0, void *unused)
     317             : #define DECLS(opt1, opt2) \
     318             :   DECL(4, opt1);          \
     319             :   DECL(8, opt1);          \
     320             :   DECL(16, opt1)
     321             : 
     322             : DECLS(sse2, sse2);
     323             : DECLS(ssse3, ssse3);
     324             : #undef DECLS
     325             : #undef DECL
     326             : 
     327             : #define FN(w, h, wf, wlog2, hlog2, opt, cast_prod, cast)                       \
     328             :   unsigned int vpx_sub_pixel_variance##w##x##h##_##opt(                        \
     329             :       const uint8_t *src, int src_stride, int x_offset, int y_offset,          \
     330             :       const uint8_t *dst, int dst_stride, unsigned int *sse_ptr) {             \
     331             :     unsigned int sse;                                                          \
     332             :     int se = vpx_sub_pixel_variance##wf##xh_##opt(src, src_stride, x_offset,   \
     333             :                                                   y_offset, dst, dst_stride,   \
     334             :                                                   h, &sse, NULL, NULL);        \
     335             :     if (w > wf) {                                                              \
     336             :       unsigned int sse2;                                                       \
     337             :       int se2 = vpx_sub_pixel_variance##wf##xh_##opt(                          \
     338             :           src + 16, src_stride, x_offset, y_offset, dst + 16, dst_stride, h,   \
     339             :           &sse2, NULL, NULL);                                                  \
     340             :       se += se2;                                                               \
     341             :       sse += sse2;                                                             \
     342             :       if (w > wf * 2) {                                                        \
     343             :         se2 = vpx_sub_pixel_variance##wf##xh_##opt(                            \
     344             :             src + 32, src_stride, x_offset, y_offset, dst + 32, dst_stride, h, \
     345             :             &sse2, NULL, NULL);                                                \
     346             :         se += se2;                                                             \
     347             :         sse += sse2;                                                           \
     348             :         se2 = vpx_sub_pixel_variance##wf##xh_##opt(                            \
     349             :             src + 48, src_stride, x_offset, y_offset, dst + 48, dst_stride, h, \
     350             :             &sse2, NULL, NULL);                                                \
     351             :         se += se2;                                                             \
     352             :         sse += sse2;                                                           \
     353             :       }                                                                        \
     354             :     }                                                                          \
     355             :     *sse_ptr = sse;                                                            \
     356             :     return sse - (unsigned int)(cast_prod(cast se * se) >> (wlog2 + hlog2));   \
     357             :   }
     358             : 
     359             : #define FNS(opt1, opt2)                              \
     360             :   FN(64, 64, 16, 6, 6, opt1, (int64_t), (int64_t));  \
     361             :   FN(64, 32, 16, 6, 5, opt1, (int64_t), (int64_t));  \
     362             :   FN(32, 64, 16, 5, 6, opt1, (int64_t), (int64_t));  \
     363             :   FN(32, 32, 16, 5, 5, opt1, (int64_t), (int64_t));  \
     364             :   FN(32, 16, 16, 5, 4, opt1, (int64_t), (int64_t));  \
     365             :   FN(16, 32, 16, 4, 5, opt1, (int64_t), (int64_t));  \
     366             :   FN(16, 16, 16, 4, 4, opt1, (uint32_t), (int64_t)); \
     367             :   FN(16, 8, 16, 4, 3, opt1, (int32_t), (int32_t));   \
     368             :   FN(8, 16, 8, 3, 4, opt1, (int32_t), (int32_t));    \
     369             :   FN(8, 8, 8, 3, 3, opt1, (int32_t), (int32_t));     \
     370             :   FN(8, 4, 8, 3, 2, opt1, (int32_t), (int32_t));     \
     371             :   FN(4, 8, 4, 2, 3, opt1, (int32_t), (int32_t));     \
     372             :   FN(4, 4, 4, 2, 2, opt1, (int32_t), (int32_t))
     373             : 
     374           0 : FNS(sse2, sse2);
     375           0 : FNS(ssse3, ssse3);
     376             : 
     377             : #undef FNS
     378             : #undef FN
     379             : 
     380             : // The 2 unused parameters are place holders for PIC enabled build.
     381             : #define DECL(w, opt)                                                        \
     382             :   int vpx_sub_pixel_avg_variance##w##xh_##opt(                              \
     383             :       const uint8_t *src, ptrdiff_t src_stride, int x_offset, int y_offset, \
     384             :       const uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *sec,         \
     385             :       ptrdiff_t sec_stride, int height, unsigned int *sse, void *unused0,   \
     386             :       void *unused)
     387             : #define DECLS(opt1, opt2) \
     388             :   DECL(4, opt1);          \
     389             :   DECL(8, opt1);          \
     390             :   DECL(16, opt1)
     391             : 
     392             : DECLS(sse2, sse2);
     393             : DECLS(ssse3, ssse3);
     394             : #undef DECL
     395             : #undef DECLS
     396             : 
     397             : #define FN(w, h, wf, wlog2, hlog2, opt, cast_prod, cast)                       \
     398             :   unsigned int vpx_sub_pixel_avg_variance##w##x##h##_##opt(                    \
     399             :       const uint8_t *src, int src_stride, int x_offset, int y_offset,          \
     400             :       const uint8_t *dst, int dst_stride, unsigned int *sseptr,                \
     401             :       const uint8_t *sec) {                                                    \
     402             :     unsigned int sse;                                                          \
     403             :     int se = vpx_sub_pixel_avg_variance##wf##xh_##opt(                         \
     404             :         src, src_stride, x_offset, y_offset, dst, dst_stride, sec, w, h, &sse, \
     405             :         NULL, NULL);                                                           \
     406             :     if (w > wf) {                                                              \
     407             :       unsigned int sse2;                                                       \
     408             :       int se2 = vpx_sub_pixel_avg_variance##wf##xh_##opt(                      \
     409             :           src + 16, src_stride, x_offset, y_offset, dst + 16, dst_stride,      \
     410             :           sec + 16, w, h, &sse2, NULL, NULL);                                  \
     411             :       se += se2;                                                               \
     412             :       sse += sse2;                                                             \
     413             :       if (w > wf * 2) {                                                        \
     414             :         se2 = vpx_sub_pixel_avg_variance##wf##xh_##opt(                        \
     415             :             src + 32, src_stride, x_offset, y_offset, dst + 32, dst_stride,    \
     416             :             sec + 32, w, h, &sse2, NULL, NULL);                                \
     417             :         se += se2;                                                             \
     418             :         sse += sse2;                                                           \
     419             :         se2 = vpx_sub_pixel_avg_variance##wf##xh_##opt(                        \
     420             :             src + 48, src_stride, x_offset, y_offset, dst + 48, dst_stride,    \
     421             :             sec + 48, w, h, &sse2, NULL, NULL);                                \
     422             :         se += se2;                                                             \
     423             :         sse += sse2;                                                           \
     424             :       }                                                                        \
     425             :     }                                                                          \
     426             :     *sseptr = sse;                                                             \
     427             :     return sse - (unsigned int)(cast_prod(cast se * se) >> (wlog2 + hlog2));   \
     428             :   }
     429             : 
     430             : #define FNS(opt1, opt2)                              \
     431             :   FN(64, 64, 16, 6, 6, opt1, (int64_t), (int64_t));  \
     432             :   FN(64, 32, 16, 6, 5, opt1, (int64_t), (int64_t));  \
     433             :   FN(32, 64, 16, 5, 6, opt1, (int64_t), (int64_t));  \
     434             :   FN(32, 32, 16, 5, 5, opt1, (int64_t), (int64_t));  \
     435             :   FN(32, 16, 16, 5, 4, opt1, (int64_t), (int64_t));  \
     436             :   FN(16, 32, 16, 4, 5, opt1, (int64_t), (int64_t));  \
     437             :   FN(16, 16, 16, 4, 4, opt1, (uint32_t), (int64_t)); \
     438             :   FN(16, 8, 16, 4, 3, opt1, (uint32_t), (int32_t));  \
     439             :   FN(8, 16, 8, 3, 4, opt1, (uint32_t), (int32_t));   \
     440             :   FN(8, 8, 8, 3, 3, opt1, (uint32_t), (int32_t));    \
     441             :   FN(8, 4, 8, 3, 2, opt1, (uint32_t), (int32_t));    \
     442             :   FN(4, 8, 4, 2, 3, opt1, (uint32_t), (int32_t));    \
     443             :   FN(4, 4, 4, 2, 2, opt1, (uint32_t), (int32_t))
     444             : 
     445           0 : FNS(sse2, sse);
     446           0 : FNS(ssse3, ssse3);
     447             : 
     448             : #undef FNS
     449             : #undef FN

Generated by: LCOV version 1.13