LCOV - code coverage report
Current view: top level - third_party/aom/aom_dsp/x86 - variance_avx2.c (source / functions) Hit Total Coverage
Test: output.info Lines: 0 55 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 11 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
       3             :  *
       4             :  * This source code is subject to the terms of the BSD 2 Clause License and
       5             :  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
       6             :  * was not distributed with this source code in the LICENSE file, you can
       7             :  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
       8             :  * Media Patent License 1.0 was not distributed with this source code in the
       9             :  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
      10             :  */
      11             : 
      12             : #include <immintrin.h>
      13             : #include "./aom_dsp_rtcd.h"
      14             : 
      15             : typedef void (*get_var_avx2)(const uint8_t *src, int src_stride,
      16             :                              const uint8_t *ref, int ref_stride,
      17             :                              unsigned int *sse, int *sum);
      18             : 
      19             : void aom_get32x32var_avx2(const uint8_t *src, int src_stride,
      20             :                           const uint8_t *ref, int ref_stride, unsigned int *sse,
      21             :                           int *sum);
      22             : 
      23           0 : static void variance_avx2(const uint8_t *src, int src_stride,
      24             :                           const uint8_t *ref, int ref_stride, int w, int h,
      25             :                           unsigned int *sse, int *sum, get_var_avx2 var_fn,
      26             :                           int block_size) {
      27             :   int i, j;
      28             : 
      29           0 :   *sse = 0;
      30           0 :   *sum = 0;
      31             : 
      32           0 :   for (i = 0; i < h; i += 16) {
      33           0 :     for (j = 0; j < w; j += block_size) {
      34             :       unsigned int sse0;
      35             :       int sum0;
      36           0 :       var_fn(&src[src_stride * i + j], src_stride, &ref[ref_stride * i + j],
      37             :              ref_stride, &sse0, &sum0);
      38           0 :       *sse += sse0;
      39           0 :       *sum += sum0;
      40             :     }
      41             :   }
      42           0 : }
      43             : 
      44           0 : unsigned int aom_variance16x16_avx2(const uint8_t *src, int src_stride,
      45             :                                     const uint8_t *ref, int ref_stride,
      46             :                                     unsigned int *sse) {
      47             :   int sum;
      48             :   unsigned int variance;
      49           0 :   variance_avx2(src, src_stride, ref, ref_stride, 16, 16, sse, &sum,
      50             :                 aom_get16x16var_avx2, 16);
      51             : 
      52           0 :   variance = *sse - (((uint32_t)((int64_t)sum * sum)) >> 8);
      53             :   _mm256_zeroupper();
      54           0 :   return variance;
      55             : }
      56             : 
      57           0 : unsigned int aom_mse16x16_avx2(const uint8_t *src, int src_stride,
      58             :                                const uint8_t *ref, int ref_stride,
      59             :                                unsigned int *sse) {
      60             :   int sum;
      61           0 :   aom_get16x16var_avx2(src, src_stride, ref, ref_stride, sse, &sum);
      62             :   _mm256_zeroupper();
      63           0 :   return *sse;
      64             : }
      65             : 
      66           0 : unsigned int aom_variance32x16_avx2(const uint8_t *src, int src_stride,
      67             :                                     const uint8_t *ref, int ref_stride,
      68             :                                     unsigned int *sse) {
      69             :   int sum;
      70             :   unsigned int variance;
      71           0 :   variance_avx2(src, src_stride, ref, ref_stride, 32, 16, sse, &sum,
      72             :                 aom_get32x32var_avx2, 32);
      73             : 
      74           0 :   variance = *sse - (uint32_t)(((int64_t)sum * sum) >> 9);
      75             :   _mm256_zeroupper();
      76           0 :   return variance;
      77             : }
      78             : 
      79           0 : unsigned int aom_variance32x32_avx2(const uint8_t *src, int src_stride,
      80             :                                     const uint8_t *ref, int ref_stride,
      81             :                                     unsigned int *sse) {
      82             :   int sum;
      83             :   unsigned int variance;
      84           0 :   variance_avx2(src, src_stride, ref, ref_stride, 32, 32, sse, &sum,
      85             :                 aom_get32x32var_avx2, 32);
      86             : 
      87           0 :   variance = *sse - (uint32_t)(((int64_t)sum * sum) >> 10);
      88             :   _mm256_zeroupper();
      89           0 :   return variance;
      90             : }
      91             : 
      92           0 : unsigned int aom_variance64x64_avx2(const uint8_t *src, int src_stride,
      93             :                                     const uint8_t *ref, int ref_stride,
      94             :                                     unsigned int *sse) {
      95             :   int sum;
      96             :   unsigned int variance;
      97           0 :   variance_avx2(src, src_stride, ref, ref_stride, 64, 64, sse, &sum,
      98             :                 aom_get32x32var_avx2, 32);
      99             : 
     100           0 :   variance = *sse - (uint32_t)(((int64_t)sum * sum) >> 12);
     101             :   _mm256_zeroupper();
     102           0 :   return variance;
     103             : }
     104             : 
     105           0 : unsigned int aom_variance64x32_avx2(const uint8_t *src, int src_stride,
     106             :                                     const uint8_t *ref, int ref_stride,
     107             :                                     unsigned int *sse) {
     108             :   int sum;
     109             :   unsigned int variance;
     110           0 :   variance_avx2(src, src_stride, ref, ref_stride, 64, 32, sse, &sum,
     111             :                 aom_get32x32var_avx2, 32);
     112             : 
     113           0 :   variance = *sse - (uint32_t)(((int64_t)sum * sum) >> 11);
     114             :   _mm256_zeroupper();
     115           0 :   return variance;
     116             : }
     117             : 
     118             : unsigned int aom_sub_pixel_variance32xh_avx2(const uint8_t *src, int src_stride,
     119             :                                              int x_offset, int y_offset,
     120             :                                              const uint8_t *dst, int dst_stride,
     121             :                                              int height, unsigned int *sse);
     122             : 
     123             : unsigned int aom_sub_pixel_avg_variance32xh_avx2(
     124             :     const uint8_t *src, int src_stride, int x_offset, int y_offset,
     125             :     const uint8_t *dst, int dst_stride, const uint8_t *sec, int sec_stride,
     126             :     int height, unsigned int *sseptr);
     127             : 
     128           0 : unsigned int aom_sub_pixel_variance64x64_avx2(const uint8_t *src,
     129             :                                               int src_stride, int x_offset,
     130             :                                               int y_offset, const uint8_t *dst,
     131             :                                               int dst_stride,
     132             :                                               unsigned int *sse) {
     133             :   unsigned int sse1;
     134           0 :   const int se1 = aom_sub_pixel_variance32xh_avx2(
     135             :       src, src_stride, x_offset, y_offset, dst, dst_stride, 64, &sse1);
     136             :   unsigned int sse2;
     137           0 :   const int se2 =
     138           0 :       aom_sub_pixel_variance32xh_avx2(src + 32, src_stride, x_offset, y_offset,
     139             :                                       dst + 32, dst_stride, 64, &sse2);
     140           0 :   const int se = se1 + se2;
     141             :   unsigned int variance;
     142           0 :   *sse = sse1 + sse2;
     143             : 
     144           0 :   variance = *sse - (uint32_t)(((int64_t)se * se) >> 12);
     145             :   _mm256_zeroupper();
     146           0 :   return variance;
     147             : }
     148             : 
     149           0 : unsigned int aom_sub_pixel_variance32x32_avx2(const uint8_t *src,
     150             :                                               int src_stride, int x_offset,
     151             :                                               int y_offset, const uint8_t *dst,
     152             :                                               int dst_stride,
     153             :                                               unsigned int *sse) {
     154           0 :   const int se = aom_sub_pixel_variance32xh_avx2(
     155             :       src, src_stride, x_offset, y_offset, dst, dst_stride, 32, sse);
     156             : 
     157           0 :   const unsigned int variance = *sse - (uint32_t)(((int64_t)se * se) >> 10);
     158             :   _mm256_zeroupper();
     159           0 :   return variance;
     160             : }
     161             : 
     162           0 : unsigned int aom_sub_pixel_avg_variance64x64_avx2(
     163             :     const uint8_t *src, int src_stride, int x_offset, int y_offset,
     164             :     const uint8_t *dst, int dst_stride, unsigned int *sse, const uint8_t *sec) {
     165             :   unsigned int sse1;
     166           0 :   const int se1 = aom_sub_pixel_avg_variance32xh_avx2(
     167             :       src, src_stride, x_offset, y_offset, dst, dst_stride, sec, 64, 64, &sse1);
     168             :   unsigned int sse2;
     169           0 :   const int se2 = aom_sub_pixel_avg_variance32xh_avx2(
     170             :       src + 32, src_stride, x_offset, y_offset, dst + 32, dst_stride, sec + 32,
     171             :       64, 64, &sse2);
     172           0 :   const int se = se1 + se2;
     173             :   unsigned int variance;
     174             : 
     175           0 :   *sse = sse1 + sse2;
     176             : 
     177           0 :   variance = *sse - (uint32_t)(((int64_t)se * se) >> 12);
     178             :   _mm256_zeroupper();
     179           0 :   return variance;
     180             : }
     181             : 
     182           0 : unsigned int aom_sub_pixel_avg_variance32x32_avx2(
     183             :     const uint8_t *src, int src_stride, int x_offset, int y_offset,
     184             :     const uint8_t *dst, int dst_stride, unsigned int *sse, const uint8_t *sec) {
     185             :   // Process 32 elements in parallel.
     186           0 :   const int se = aom_sub_pixel_avg_variance32xh_avx2(
     187             :       src, src_stride, x_offset, y_offset, dst, dst_stride, sec, 32, 32, sse);
     188             : 
     189           0 :   const unsigned int variance = *sse - (uint32_t)(((int64_t)se * se) >> 10);
     190             :   _mm256_zeroupper();
     191           0 :   return variance;
     192             : }

Generated by: LCOV version 1.13