LCOV - code coverage report
Current view: top level - third_party/aom/aom_dsp/x86 - highbd_quantize_intrin_sse2.c (source / functions) Hit Total Coverage
Test: output.info Lines: 0 80 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 2 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
       3             :  *
       4             :  * This source code is subject to the terms of the BSD 2 Clause License and
       5             :  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
       6             :  * was not distributed with this source code in the LICENSE file, you can
       7             :  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
       8             :  * Media Patent License 1.0 was not distributed with this source code in the
       9             :  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
      10             :  */
      11             : 
      12             : #include <emmintrin.h>
      13             : 
      14             : #include "aom_dsp/aom_dsp_common.h"
      15             : #include "aom_mem/aom_mem.h"
      16             : #include "aom_ports/mem.h"
      17             : 
      18             : #if CONFIG_HIGHBITDEPTH
      19           0 : void aom_highbd_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t count,
      20             :                                 int skip_block, const int16_t *zbin_ptr,
      21             :                                 const int16_t *round_ptr,
      22             :                                 const int16_t *quant_ptr,
      23             :                                 const int16_t *quant_shift_ptr,
      24             :                                 tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
      25             :                                 const int16_t *dequant_ptr, uint16_t *eob_ptr,
      26             :                                 const int16_t *scan, const int16_t *iscan) {
      27           0 :   int i, j, non_zero_regs = (int)count / 4, eob_i = -1;
      28             :   __m128i zbins[2];
      29             :   __m128i nzbins[2];
      30             : 
      31           0 :   zbins[0] = _mm_set_epi32((int)zbin_ptr[1], (int)zbin_ptr[1], (int)zbin_ptr[1],
      32           0 :                            (int)zbin_ptr[0]);
      33           0 :   zbins[1] = _mm_set1_epi32((int)zbin_ptr[1]);
      34             : 
      35           0 :   nzbins[0] = _mm_setzero_si128();
      36           0 :   nzbins[1] = _mm_setzero_si128();
      37           0 :   nzbins[0] = _mm_sub_epi32(nzbins[0], zbins[0]);
      38           0 :   nzbins[1] = _mm_sub_epi32(nzbins[1], zbins[1]);
      39             : 
      40             :   (void)scan;
      41             : 
      42           0 :   memset(qcoeff_ptr, 0, count * sizeof(*qcoeff_ptr));
      43           0 :   memset(dqcoeff_ptr, 0, count * sizeof(*dqcoeff_ptr));
      44             : 
      45           0 :   if (!skip_block) {
      46             :     // Pre-scan pass
      47           0 :     for (i = ((int)count / 4) - 1; i >= 0; i--) {
      48             :       __m128i coeffs, cmp1, cmp2;
      49             :       int test;
      50           0 :       coeffs = _mm_load_si128((const __m128i *)(coeff_ptr + i * 4));
      51           0 :       cmp1 = _mm_cmplt_epi32(coeffs, zbins[i != 0]);
      52           0 :       cmp2 = _mm_cmpgt_epi32(coeffs, nzbins[i != 0]);
      53           0 :       cmp1 = _mm_and_si128(cmp1, cmp2);
      54           0 :       test = _mm_movemask_epi8(cmp1);
      55           0 :       if (test == 0xffff)
      56           0 :         non_zero_regs--;
      57             :       else
      58           0 :         break;
      59             :     }
      60             : 
      61             :     // Quantization pass:
      62           0 :     for (i = 0; i < non_zero_regs; i++) {
      63             :       __m128i coeffs, coeffs_sign, tmp1, tmp2;
      64             :       int test;
      65             :       int abs_coeff[4];
      66             :       int coeff_sign[4];
      67             : 
      68           0 :       coeffs = _mm_load_si128((const __m128i *)(coeff_ptr + i * 4));
      69           0 :       coeffs_sign = _mm_srai_epi32(coeffs, 31);
      70           0 :       coeffs = _mm_sub_epi32(_mm_xor_si128(coeffs, coeffs_sign), coeffs_sign);
      71           0 :       tmp1 = _mm_cmpgt_epi32(coeffs, zbins[i != 0]);
      72           0 :       tmp2 = _mm_cmpeq_epi32(coeffs, zbins[i != 0]);
      73           0 :       tmp1 = _mm_or_si128(tmp1, tmp2);
      74           0 :       test = _mm_movemask_epi8(tmp1);
      75             :       _mm_storeu_si128((__m128i *)abs_coeff, coeffs);
      76             :       _mm_storeu_si128((__m128i *)coeff_sign, coeffs_sign);
      77             : 
      78           0 :       for (j = 0; j < 4; j++) {
      79           0 :         if (test & (1 << (4 * j))) {
      80           0 :           int k = 4 * i + j;
      81           0 :           const int64_t tmp3 = abs_coeff[j] + round_ptr[k != 0];
      82           0 :           const int64_t tmp4 = ((tmp3 * quant_ptr[k != 0]) >> 16) + tmp3;
      83           0 :           const uint32_t abs_qcoeff =
      84           0 :               (uint32_t)((tmp4 * quant_shift_ptr[k != 0]) >> 16);
      85           0 :           qcoeff_ptr[k] = (int)(abs_qcoeff ^ coeff_sign[j]) - coeff_sign[j];
      86           0 :           dqcoeff_ptr[k] = qcoeff_ptr[k] * dequant_ptr[k != 0];
      87           0 :           if (abs_qcoeff) eob_i = iscan[k] > eob_i ? iscan[k] : eob_i;
      88             :         }
      89             :       }
      90             :     }
      91             :   }
      92           0 :   *eob_ptr = eob_i + 1;
      93           0 : }
      94             : 
      95           0 : void aom_highbd_quantize_b_32x32_sse2(
      96             :     const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
      97             :     const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
      98             :     const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
      99             :     tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
     100             :     const int16_t *scan, const int16_t *iscan) {
     101             :   __m128i zbins[2];
     102             :   __m128i nzbins[2];
     103           0 :   int idx = 0;
     104             :   int idx_arr[1024];
     105           0 :   int i, eob = -1;
     106           0 :   const int zbin0_tmp = ROUND_POWER_OF_TWO(zbin_ptr[0], 1);
     107           0 :   const int zbin1_tmp = ROUND_POWER_OF_TWO(zbin_ptr[1], 1);
     108             :   (void)scan;
     109           0 :   zbins[0] = _mm_set_epi32(zbin1_tmp, zbin1_tmp, zbin1_tmp, zbin0_tmp);
     110           0 :   zbins[1] = _mm_set1_epi32(zbin1_tmp);
     111             : 
     112           0 :   nzbins[0] = _mm_setzero_si128();
     113           0 :   nzbins[1] = _mm_setzero_si128();
     114           0 :   nzbins[0] = _mm_sub_epi32(nzbins[0], zbins[0]);
     115           0 :   nzbins[1] = _mm_sub_epi32(nzbins[1], zbins[1]);
     116             : 
     117           0 :   memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
     118           0 :   memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
     119             : 
     120           0 :   if (!skip_block) {
     121             :     // Pre-scan pass
     122           0 :     for (i = 0; i < n_coeffs / 4; i++) {
     123             :       __m128i coeffs, cmp1, cmp2;
     124             :       int test;
     125           0 :       coeffs = _mm_load_si128((const __m128i *)(coeff_ptr + i * 4));
     126           0 :       cmp1 = _mm_cmplt_epi32(coeffs, zbins[i != 0]);
     127           0 :       cmp2 = _mm_cmpgt_epi32(coeffs, nzbins[i != 0]);
     128           0 :       cmp1 = _mm_and_si128(cmp1, cmp2);
     129           0 :       test = _mm_movemask_epi8(cmp1);
     130           0 :       if (!(test & 0xf)) idx_arr[idx++] = i * 4;
     131           0 :       if (!(test & 0xf0)) idx_arr[idx++] = i * 4 + 1;
     132           0 :       if (!(test & 0xf00)) idx_arr[idx++] = i * 4 + 2;
     133           0 :       if (!(test & 0xf000)) idx_arr[idx++] = i * 4 + 3;
     134             :     }
     135             : 
     136             :     // Quantization pass: only process the coefficients selected in
     137             :     // pre-scan pass. Note: idx can be zero.
     138           0 :     for (i = 0; i < idx; i++) {
     139           0 :       const int rc = idx_arr[i];
     140           0 :       const int coeff = coeff_ptr[rc];
     141           0 :       const int coeff_sign = (coeff >> 31);
     142           0 :       const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
     143           0 :       const int64_t tmp1 =
     144           0 :           abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
     145           0 :       const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1;
     146           0 :       const uint32_t abs_qcoeff =
     147           0 :           (uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> 15);
     148           0 :       qcoeff_ptr[rc] = (int)(abs_qcoeff ^ coeff_sign) - coeff_sign;
     149           0 :       dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
     150           0 :       if (abs_qcoeff) eob = iscan[idx_arr[i]] > eob ? iscan[idx_arr[i]] : eob;
     151             :     }
     152             :   }
     153           0 :   *eob_ptr = eob + 1;
     154           0 : }
     155             : #endif

Generated by: LCOV version 1.13