LCOV - code coverage report
Current view: top level - third_party/aom/av1/encoder/x86 - highbd_block_error_intrin_sse2.c (source / functions) Hit Total Coverage
Test: output.info Lines: 0 33 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 1 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
       3             :  *
       4             :  * This source code is subject to the terms of the BSD 2 Clause License and
       5             :  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
       6             :  * was not distributed with this source code in the LICENSE file, you can
       7             :  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
       8             :  * Media Patent License 1.0 was not distributed with this source code in the
       9             :  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
      10             :  */
      11             : 
      12             : #include <emmintrin.h>
      13             : #include <stdio.h>
      14             : 
      15             : #include "av1/common/common.h"
      16             : 
      17           0 : int64_t av1_highbd_block_error_sse2(tran_low_t *coeff, tran_low_t *dqcoeff,
      18             :                                     intptr_t block_size, int64_t *ssz,
      19             :                                     int bps) {
      20             :   int i, j, test;
      21             :   uint32_t temp[4];
      22             :   __m128i max, min, cmp0, cmp1, cmp2, cmp3;
      23           0 :   int64_t error = 0, sqcoeff = 0;
      24           0 :   const int shift = 2 * (bps - 8);
      25           0 :   const int rounding = shift > 0 ? 1 << (shift - 1) : 0;
      26             : 
      27           0 :   for (i = 0; i < block_size; i += 8) {
      28             :     // Load the data into xmm registers
      29           0 :     __m128i mm_coeff = _mm_load_si128((__m128i *)(coeff + i));
      30           0 :     __m128i mm_coeff2 = _mm_load_si128((__m128i *)(coeff + i + 4));
      31           0 :     __m128i mm_dqcoeff = _mm_load_si128((__m128i *)(dqcoeff + i));
      32           0 :     __m128i mm_dqcoeff2 = _mm_load_si128((__m128i *)(dqcoeff + i + 4));
      33             :     // Check if any values require more than 15 bit
      34           0 :     max = _mm_set1_epi32(0x3fff);
      35           0 :     min = _mm_set1_epi32(0xffffc000);
      36           0 :     cmp0 = _mm_xor_si128(_mm_cmpgt_epi32(mm_coeff, max),
      37             :                          _mm_cmplt_epi32(mm_coeff, min));
      38           0 :     cmp1 = _mm_xor_si128(_mm_cmpgt_epi32(mm_coeff2, max),
      39             :                          _mm_cmplt_epi32(mm_coeff2, min));
      40           0 :     cmp2 = _mm_xor_si128(_mm_cmpgt_epi32(mm_dqcoeff, max),
      41             :                          _mm_cmplt_epi32(mm_dqcoeff, min));
      42           0 :     cmp3 = _mm_xor_si128(_mm_cmpgt_epi32(mm_dqcoeff2, max),
      43             :                          _mm_cmplt_epi32(mm_dqcoeff2, min));
      44           0 :     test = _mm_movemask_epi8(
      45             :         _mm_or_si128(_mm_or_si128(cmp0, cmp1), _mm_or_si128(cmp2, cmp3)));
      46             : 
      47           0 :     if (!test) {
      48             :       __m128i mm_diff, error_sse2, sqcoeff_sse2;
      49           0 :       mm_coeff = _mm_packs_epi32(mm_coeff, mm_coeff2);
      50           0 :       mm_dqcoeff = _mm_packs_epi32(mm_dqcoeff, mm_dqcoeff2);
      51           0 :       mm_diff = _mm_sub_epi16(mm_coeff, mm_dqcoeff);
      52           0 :       error_sse2 = _mm_madd_epi16(mm_diff, mm_diff);
      53           0 :       sqcoeff_sse2 = _mm_madd_epi16(mm_coeff, mm_coeff);
      54             :       _mm_storeu_si128((__m128i *)temp, error_sse2);
      55           0 :       error = error + temp[0] + temp[1] + temp[2] + temp[3];
      56             :       _mm_storeu_si128((__m128i *)temp, sqcoeff_sse2);
      57           0 :       sqcoeff += temp[0] + temp[1] + temp[2] + temp[3];
      58             :     } else {
      59           0 :       for (j = 0; j < 8; j++) {
      60           0 :         const int64_t diff = coeff[i + j] - dqcoeff[i + j];
      61           0 :         error += diff * diff;
      62           0 :         sqcoeff += (int64_t)coeff[i + j] * (int64_t)coeff[i + j];
      63             :       }
      64             :     }
      65             :   }
      66           0 :   assert(error >= 0 && sqcoeff >= 0);
      67           0 :   error = (error + rounding) >> shift;
      68           0 :   sqcoeff = (sqcoeff + rounding) >> shift;
      69             : 
      70           0 :   *ssz = sqcoeff;
      71           0 :   return error;
      72             : }

Generated by: LCOV version 1.13