LCOV - code coverage report
Current view: top level - media/libvpx/libvpx/vp9/common/x86 - vp9_idct_intrin_sse2.c (source / functions) Hit Total Coverage
Test: output.info Lines: 0 114 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 3 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
       3             :  *
       4             :  *  Use of this source code is governed by a BSD-style license
       5             :  *  that can be found in the LICENSE file in the root of the source
       6             :  *  tree. An additional intellectual property rights grant can be found
       7             :  *  in the file PATENTS.  All contributing project authors may
       8             :  *  be found in the AUTHORS file in the root of the source tree.
       9             :  */
      10             : 
      11             : #include "./vp9_rtcd.h"
      12             : #include "vpx_dsp/x86/inv_txfm_sse2.h"
      13             : #include "vpx_dsp/x86/txfm_common_sse2.h"
      14             : #include "vpx_ports/mem.h"
      15             : 
      16           0 : void vp9_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
      17             :                             int tx_type) {
      18             :   __m128i in[2];
      19           0 :   const __m128i zero = _mm_setzero_si128();
      20           0 :   const __m128i eight = _mm_set1_epi16(8);
      21             : 
      22           0 :   in[0] = load_input_data(input);
      23           0 :   in[1] = load_input_data(input + 8);
      24             : 
      25           0 :   switch (tx_type) {
      26             :     case 0:  // DCT_DCT
      27           0 :       idct4_sse2(in);
      28           0 :       idct4_sse2(in);
      29           0 :       break;
      30             :     case 1:  // ADST_DCT
      31           0 :       idct4_sse2(in);
      32           0 :       iadst4_sse2(in);
      33           0 :       break;
      34             :     case 2:  // DCT_ADST
      35           0 :       iadst4_sse2(in);
      36           0 :       idct4_sse2(in);
      37           0 :       break;
      38             :     case 3:  // ADST_ADST
      39           0 :       iadst4_sse2(in);
      40           0 :       iadst4_sse2(in);
      41           0 :       break;
      42           0 :     default: assert(0); break;
      43             :   }
      44             : 
      45             :   // Final round and shift
      46           0 :   in[0] = _mm_add_epi16(in[0], eight);
      47           0 :   in[1] = _mm_add_epi16(in[1], eight);
      48             : 
      49           0 :   in[0] = _mm_srai_epi16(in[0], 4);
      50           0 :   in[1] = _mm_srai_epi16(in[1], 4);
      51             : 
      52             :   // Reconstruction and Store
      53             :   {
      54           0 :     __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest));
      55           0 :     __m128i d2 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 2));
      56           0 :     d0 = _mm_unpacklo_epi32(d0,
      57           0 :                             _mm_cvtsi32_si128(*(const int *)(dest + stride)));
      58           0 :     d2 = _mm_unpacklo_epi32(
      59           0 :         d2, _mm_cvtsi32_si128(*(const int *)(dest + stride * 3)));
      60           0 :     d0 = _mm_unpacklo_epi8(d0, zero);
      61           0 :     d2 = _mm_unpacklo_epi8(d2, zero);
      62           0 :     d0 = _mm_add_epi16(d0, in[0]);
      63           0 :     d2 = _mm_add_epi16(d2, in[1]);
      64           0 :     d0 = _mm_packus_epi16(d0, d2);
      65             :     // store result[0]
      66           0 :     *(int *)dest = _mm_cvtsi128_si32(d0);
      67             :     // store result[1]
      68           0 :     d0 = _mm_srli_si128(d0, 4);
      69           0 :     *(int *)(dest + stride) = _mm_cvtsi128_si32(d0);
      70             :     // store result[2]
      71           0 :     d0 = _mm_srli_si128(d0, 4);
      72           0 :     *(int *)(dest + stride * 2) = _mm_cvtsi128_si32(d0);
      73             :     // store result[3]
      74           0 :     d0 = _mm_srli_si128(d0, 4);
      75           0 :     *(int *)(dest + stride * 3) = _mm_cvtsi128_si32(d0);
      76             :   }
      77           0 : }
      78             : 
      79           0 : void vp9_iht8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
      80             :                             int tx_type) {
      81             :   __m128i in[8];
      82           0 :   const __m128i zero = _mm_setzero_si128();
      83           0 :   const __m128i final_rounding = _mm_set1_epi16(1 << 4);
      84             : 
      85             :   // load input data
      86           0 :   in[0] = load_input_data(input);
      87           0 :   in[1] = load_input_data(input + 8 * 1);
      88           0 :   in[2] = load_input_data(input + 8 * 2);
      89           0 :   in[3] = load_input_data(input + 8 * 3);
      90           0 :   in[4] = load_input_data(input + 8 * 4);
      91           0 :   in[5] = load_input_data(input + 8 * 5);
      92           0 :   in[6] = load_input_data(input + 8 * 6);
      93           0 :   in[7] = load_input_data(input + 8 * 7);
      94             : 
      95           0 :   switch (tx_type) {
      96             :     case 0:  // DCT_DCT
      97           0 :       idct8_sse2(in);
      98           0 :       idct8_sse2(in);
      99           0 :       break;
     100             :     case 1:  // ADST_DCT
     101           0 :       idct8_sse2(in);
     102           0 :       iadst8_sse2(in);
     103           0 :       break;
     104             :     case 2:  // DCT_ADST
     105           0 :       iadst8_sse2(in);
     106           0 :       idct8_sse2(in);
     107           0 :       break;
     108             :     case 3:  // ADST_ADST
     109           0 :       iadst8_sse2(in);
     110           0 :       iadst8_sse2(in);
     111           0 :       break;
     112           0 :     default: assert(0); break;
     113             :   }
     114             : 
     115             :   // Final rounding and shift
     116           0 :   in[0] = _mm_adds_epi16(in[0], final_rounding);
     117           0 :   in[1] = _mm_adds_epi16(in[1], final_rounding);
     118           0 :   in[2] = _mm_adds_epi16(in[2], final_rounding);
     119           0 :   in[3] = _mm_adds_epi16(in[3], final_rounding);
     120           0 :   in[4] = _mm_adds_epi16(in[4], final_rounding);
     121           0 :   in[5] = _mm_adds_epi16(in[5], final_rounding);
     122           0 :   in[6] = _mm_adds_epi16(in[6], final_rounding);
     123           0 :   in[7] = _mm_adds_epi16(in[7], final_rounding);
     124             : 
     125           0 :   in[0] = _mm_srai_epi16(in[0], 5);
     126           0 :   in[1] = _mm_srai_epi16(in[1], 5);
     127           0 :   in[2] = _mm_srai_epi16(in[2], 5);
     128           0 :   in[3] = _mm_srai_epi16(in[3], 5);
     129           0 :   in[4] = _mm_srai_epi16(in[4], 5);
     130           0 :   in[5] = _mm_srai_epi16(in[5], 5);
     131           0 :   in[6] = _mm_srai_epi16(in[6], 5);
     132           0 :   in[7] = _mm_srai_epi16(in[7], 5);
     133             : 
     134           0 :   RECON_AND_STORE(dest + 0 * stride, in[0]);
     135           0 :   RECON_AND_STORE(dest + 1 * stride, in[1]);
     136           0 :   RECON_AND_STORE(dest + 2 * stride, in[2]);
     137           0 :   RECON_AND_STORE(dest + 3 * stride, in[3]);
     138           0 :   RECON_AND_STORE(dest + 4 * stride, in[4]);
     139           0 :   RECON_AND_STORE(dest + 5 * stride, in[5]);
     140           0 :   RECON_AND_STORE(dest + 6 * stride, in[6]);
     141           0 :   RECON_AND_STORE(dest + 7 * stride, in[7]);
     142           0 : }
     143             : 
     144           0 : void vp9_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest,
     145             :                                int stride, int tx_type) {
     146             :   __m128i in0[16], in1[16];
     147             : 
     148           0 :   load_buffer_8x16(input, in0);
     149           0 :   input += 8;
     150           0 :   load_buffer_8x16(input, in1);
     151             : 
     152           0 :   switch (tx_type) {
     153             :     case 0:  // DCT_DCT
     154           0 :       idct16_sse2(in0, in1);
     155           0 :       idct16_sse2(in0, in1);
     156           0 :       break;
     157             :     case 1:  // ADST_DCT
     158           0 :       idct16_sse2(in0, in1);
     159           0 :       iadst16_sse2(in0, in1);
     160           0 :       break;
     161             :     case 2:  // DCT_ADST
     162           0 :       iadst16_sse2(in0, in1);
     163           0 :       idct16_sse2(in0, in1);
     164           0 :       break;
     165             :     case 3:  // ADST_ADST
     166           0 :       iadst16_sse2(in0, in1);
     167           0 :       iadst16_sse2(in0, in1);
     168           0 :       break;
     169           0 :     default: assert(0); break;
     170             :   }
     171             : 
     172           0 :   write_buffer_8x16(dest, in0, stride);
     173           0 :   dest += 8;
     174           0 :   write_buffer_8x16(dest, in1, stride);
     175           0 : }

Generated by: LCOV version 1.13