LCOV - output.info - media/libvpx/libvpx/vp8/encoder/x86/quantize

LCOV - code coverage report

Current view:	top level - media/libvpx/libvpx/vp8/encoder/x86 - quantize_ssse3.c (source / functions)		Hit	Total	Coverage
Test:	output.info	Lines:	0	45	0.0 %
Date:	2017-07-14 16:53:18	Functions:	0	2	0.0 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : /*
       2             :  *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
       3             :  *
       4             :  *  Use of this source code is governed by a BSD-style license
       5             :  *  that can be found in the LICENSE file in the root of the source
       6             :  *  tree. An additional intellectual property rights grant can be found
       7             :  *  in the file PATENTS.  All contributing project authors may
       8             :  *  be found in the AUTHORS file in the root of the source tree.
       9             :  */
      10             : 
      11             : #include <tmmintrin.h> /* SSSE3 */
      12             : 
      13             : #include "vp8/encoder/block.h"
      14             : 
      15             : /* bitscan reverse (bsr) */
      16             : #if defined(_MSC_VER)
      17             : #include <intrin.h>
      18             : #pragma intrinsic(_BitScanReverse)
      19             : static int bsr(int mask) {
      20             :   unsigned long eob;
      21             :   _BitScanReverse(&eob, mask);
      22             :   eob++;
      23             :   if (mask == 0) eob = 0;
      24             :   return eob;
      25             : }
      26             : #else
      27           0 : static int bsr(int mask) {
      28             :   int eob;
      29             : #if defined(__GNUC__) && __GNUC__
      30           0 :   __asm__ __volatile__("bsr %1, %0" : "=r"(eob) : "r"(mask) : "flags");
      31             : #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
      32             :   asm volatile("bsr %1, %0" : "=r"(eob) : "r"(mask) : "flags");
      33             : #endif
      34           0 :   eob++;
      35           0 :   if (mask == 0) eob = 0;
      36           0 :   return eob;
      37             : }
      38             : #endif
      39             : 
      40           0 : void vp8_fast_quantize_b_ssse3(BLOCK *b, BLOCKD *d) {
      41             :   int eob, mask;
      42             : 
      43           0 :   __m128i z0 = _mm_load_si128((__m128i *)(b->coeff));
      44           0 :   __m128i z1 = _mm_load_si128((__m128i *)(b->coeff + 8));
      45           0 :   __m128i round0 = _mm_load_si128((__m128i *)(b->round));
      46           0 :   __m128i round1 = _mm_load_si128((__m128i *)(b->round + 8));
      47           0 :   __m128i quant_fast0 = _mm_load_si128((__m128i *)(b->quant_fast));
      48           0 :   __m128i quant_fast1 = _mm_load_si128((__m128i *)(b->quant_fast + 8));
      49           0 :   __m128i dequant0 = _mm_load_si128((__m128i *)(d->dequant));
      50           0 :   __m128i dequant1 = _mm_load_si128((__m128i *)(d->dequant + 8));
      51             : 
      52             :   __m128i sz0, sz1, x, x0, x1, y0, y1, zeros, abs0, abs1;
      53             : 
      54           0 :   DECLARE_ALIGNED(16, const uint8_t, pshufb_zig_zag_mask[16]) = {
      55             :     0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
      56             :   };
      57           0 :   __m128i zig_zag = _mm_load_si128((const __m128i *)pshufb_zig_zag_mask);
      58             : 
      59             :   /* sign of z: z >> 15 */
      60           0 :   sz0 = _mm_srai_epi16(z0, 15);
      61           0 :   sz1 = _mm_srai_epi16(z1, 15);
      62             : 
      63             :   /* x = abs(z) */
      64           0 :   x0 = _mm_abs_epi16(z0);
      65           0 :   x1 = _mm_abs_epi16(z1);
      66             : 
      67             :   /* x += round */
      68           0 :   x0 = _mm_add_epi16(x0, round0);
      69           0 :   x1 = _mm_add_epi16(x1, round1);
      70             : 
      71             :   /* y = (x * quant) >> 16 */
      72           0 :   y0 = _mm_mulhi_epi16(x0, quant_fast0);
      73           0 :   y1 = _mm_mulhi_epi16(x1, quant_fast1);
      74             : 
      75             :   /* ASM saves Y for EOB */
      76             :   /* I think we can ignore that because adding the sign doesn't change anything
      77             :    * and multiplying 0 by dequant is OK as well */
      78           0 :   abs0 = y0;
      79           0 :   abs1 = y1;
      80             : 
      81             :   /* Restore the sign bit. */
      82           0 :   y0 = _mm_xor_si128(y0, sz0);
      83           0 :   y1 = _mm_xor_si128(y1, sz1);
      84           0 :   x0 = _mm_sub_epi16(y0, sz0);
      85           0 :   x1 = _mm_sub_epi16(y1, sz1);
      86             : 
      87             :   /* qcoeff = x */
      88           0 :   _mm_store_si128((__m128i *)(d->qcoeff), x0);
      89           0 :   _mm_store_si128((__m128i *)(d->qcoeff + 8), x1);
      90             : 
      91             :   /* x * dequant */
      92           0 :   x0 = _mm_mullo_epi16(x0, dequant0);
      93           0 :   x1 = _mm_mullo_epi16(x1, dequant1);
      94             : 
      95             :   /* dqcoeff = x * dequant */
      96           0 :   _mm_store_si128((__m128i *)(d->dqcoeff), x0);
      97           0 :   _mm_store_si128((__m128i *)(d->dqcoeff + 8), x1);
      98             : 
      99           0 :   zeros = _mm_setzero_si128();
     100             : 
     101           0 :   x0 = _mm_cmpgt_epi16(abs0, zeros);
     102           0 :   x1 = _mm_cmpgt_epi16(abs1, zeros);
     103             : 
     104           0 :   x = _mm_packs_epi16(x0, x1);
     105             : 
     106           0 :   x = _mm_shuffle_epi8(x, zig_zag);
     107             : 
     108           0 :   mask = _mm_movemask_epi8(x);
     109             : 
     110           0 :   eob = bsr(mask);
     111             : 
     112           0 :   *d->eob = 0xFF & eob;
     113           0 : }

Generated by: LCOV version 1.13