LCOV - code coverage report
Current view: top level - third_party/aom/aom_dsp/x86 - inv_txfm_common_avx2.h (source / functions) Hit Total Coverage
Test: output.info Lines: 0 39 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 5 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * Copyright (c) 2017, Alliance for Open Media. All rights reserved
       3             :  *
       4             :  * This source code is subject to the terms of the BSD 2 Clause License and
       5             :  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
       6             :  * was not distributed with this source code in the LICENSE file, you can
       7             :  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
       8             :  * Media Patent License 1.0 was not distributed with this source code in the
       9             :  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
      10             :  */
      11             : 
      12             : #ifndef AOM_DSP_X86_INV_TXFM_COMMON_AVX2_H
      13             : #define AOM_DSP_X86_INV_TXFM_COMMON_AVX2_H
      14             : 
      15             : #include <immintrin.h>
      16             : 
      17             : #include "aom_dsp/txfm_common.h"
      18             : #include "aom_dsp/x86/txfm_common_avx2.h"
      19             : 
      20           0 : static INLINE void load_coeff(const tran_low_t *coeff, __m256i *in) {
      21             : #if CONFIG_HIGHBITDEPTH
      22           0 :   *in = _mm256_setr_epi16(
      23           0 :       (int16_t)coeff[0], (int16_t)coeff[1], (int16_t)coeff[2],
      24           0 :       (int16_t)coeff[3], (int16_t)coeff[4], (int16_t)coeff[5],
      25           0 :       (int16_t)coeff[6], (int16_t)coeff[7], (int16_t)coeff[8],
      26           0 :       (int16_t)coeff[9], (int16_t)coeff[10], (int16_t)coeff[11],
      27           0 :       (int16_t)coeff[12], (int16_t)coeff[13], (int16_t)coeff[14],
      28           0 :       (int16_t)coeff[15]);
      29             : #else
      30             :   *in = _mm256_loadu_si256((const __m256i *)coeff);
      31             : #endif
      32           0 : }
      33             : 
      34           0 : static INLINE void load_buffer_16x16(const tran_low_t *coeff, __m256i *in) {
      35           0 :   int i = 0;
      36           0 :   while (i < 16) {
      37           0 :     load_coeff(coeff + (i << 4), &in[i]);
      38           0 :     i += 1;
      39             :   }
      40           0 : }
      41             : 
      42           0 : static INLINE void recon_and_store(const __m256i *res, uint8_t *output) {
      43           0 :   const __m128i zero = _mm_setzero_si128();
      44           0 :   __m128i x = _mm_loadu_si128((__m128i const *)output);
      45           0 :   __m128i p0 = _mm_unpacklo_epi8(x, zero);
      46           0 :   __m128i p1 = _mm_unpackhi_epi8(x, zero);
      47             : 
      48           0 :   p0 = _mm_add_epi16(p0, _mm256_castsi256_si128(*res));
      49           0 :   p1 = _mm_add_epi16(p1, _mm256_extractf128_si256(*res, 1));
      50           0 :   x = _mm_packus_epi16(p0, p1);
      51             :   _mm_storeu_si128((__m128i *)output, x);
      52           0 : }
      53             : 
      54             : #define IDCT_ROUNDING_POS (6)
      55           0 : static INLINE void store_buffer_16xN(__m256i *in, const int stride,
      56             :                                      uint8_t *output, int num) {
      57           0 :   const __m256i rounding = _mm256_set1_epi16(1 << (IDCT_ROUNDING_POS - 1));
      58           0 :   int i = 0;
      59             : 
      60           0 :   while (i < num) {
      61           0 :     in[i] = _mm256_adds_epi16(in[i], rounding);
      62           0 :     in[i] = _mm256_srai_epi16(in[i], IDCT_ROUNDING_POS);
      63           0 :     recon_and_store(&in[i], output + i * stride);
      64           0 :     i += 1;
      65             :   }
      66           0 : }
      67             : 
      68           0 : static INLINE void unpack_butter_fly(const __m256i *a0, const __m256i *a1,
      69             :                                      const __m256i *c0, const __m256i *c1,
      70             :                                      __m256i *b0, __m256i *b1) {
      71             :   __m256i x0, x1;
      72           0 :   x0 = _mm256_unpacklo_epi16(*a0, *a1);
      73           0 :   x1 = _mm256_unpackhi_epi16(*a0, *a1);
      74           0 :   *b0 = butter_fly(&x0, &x1, c0);
      75           0 :   *b1 = butter_fly(&x0, &x1, c1);
      76           0 : }
      77             : 
      78             : void av1_idct16_avx2(__m256i *in);
      79             : 
      80             : #endif  // AOM_DSP_X86_INV_TXFM_COMMON_AVX2_H

Generated by: LCOV version 1.13