LCOV - code coverage report
Current view: top level - third_party/aom/aom_dsp/simd - v64_intrinsics_x86.h (source / functions) Hit Total Coverage
Test: output.info Lines: 0 5 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 0 -
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
       3             :  *
       4             :  * This source code is subject to the terms of the BSD 2 Clause License and
       5             :  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
       6             :  * was not distributed with this source code in the LICENSE file, you can
       7             :  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
       8             :  * Media Patent License 1.0 was not distributed with this source code in the
       9             :  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
      10             :  */
      11             : 
      12             : #ifndef _V64_INTRINSICS_H
      13             : #define _V64_INTRINSICS_H
      14             : 
      15             : #include <emmintrin.h>
      16             : #if defined(__SSSE3__)
      17             : #include <tmmintrin.h>
      18             : #endif
      19             : #if defined(__SSE4_1__)
      20             : #include <smmintrin.h>
      21             : #endif
      22             : 
      23             : typedef __m128i v64;
      24             : 
      25             : SIMD_INLINE uint32_t v64_low_u32(v64 a) {
      26             :   return (uint32_t)_mm_cvtsi128_si32(a);
      27             : }
      28             : 
      29             : SIMD_INLINE uint32_t v64_high_u32(v64 a) {
      30             :   return (uint32_t)_mm_cvtsi128_si32(_mm_srli_si128(a, 4));
      31             : }
      32             : 
      33             : SIMD_INLINE int32_t v64_low_s32(v64 a) { return (int32_t)_mm_cvtsi128_si32(a); }
      34             : 
      35             : SIMD_INLINE int32_t v64_high_s32(v64 a) {
      36             :   return (int32_t)_mm_cvtsi128_si32(_mm_srli_si128(a, 4));
      37             : }
      38             : 
      39             : SIMD_INLINE v64 v64_from_16(uint16_t a, uint16_t b, uint16_t c, uint16_t d) {
      40             :   return _mm_packs_epi32(
      41             :       _mm_set_epi32((int16_t)a, (int16_t)b, (int16_t)c, (int16_t)d),
      42             :       _mm_setzero_si128());
      43             : }
      44             : 
      45             : SIMD_INLINE v64 v64_from_32(uint32_t x, uint32_t y) {
      46             :   return _mm_set_epi32(0, 0, x, y);
      47             : }
      48             : 
      49             : SIMD_INLINE v64 v64_from_64(uint64_t x) {
      50             : #ifdef __x86_64__
      51             :   return _mm_cvtsi64_si128(x);
      52             : #else
      53             :   return _mm_set_epi32(0, 0, x >> 32, (uint32_t)x);
      54             : #endif
      55             : }
      56             : 
      57             : SIMD_INLINE uint64_t v64_u64(v64 x) {
      58             :   return (uint64_t)v64_low_u32(x) | ((uint64_t)v64_high_u32(x) << 32);
      59             : }
      60             : 
      61             : SIMD_INLINE uint32_t u32_load_aligned(const void *p) {
      62             :   return *((uint32_t *)p);
      63             : }
      64             : 
      65             : SIMD_INLINE uint32_t u32_load_unaligned(const void *p) {
      66             :   return *((uint32_t *)p);
      67             : }
      68             : 
      69             : SIMD_INLINE void u32_store_aligned(void *p, uint32_t a) {
      70           0 :   *((uint32_t *)p) = a;
      71             : }
      72             : 
      73             : SIMD_INLINE void u32_store_unaligned(void *p, uint32_t a) {
      74           0 :   *((uint32_t *)p) = a;
      75             : }
      76             : 
      77             : SIMD_INLINE v64 v64_load_aligned(const void *p) {
      78           0 :   return _mm_loadl_epi64((__m128i *)p);
      79             : }
      80             : 
      81             : SIMD_INLINE v64 v64_load_unaligned(const void *p) {
      82           0 :   return _mm_loadl_epi64((__m128i *)p);
      83             : }
      84             : 
      85             : SIMD_INLINE void v64_store_aligned(void *p, v64 a) {
      86             :   _mm_storel_epi64((__m128i *)p, a);
      87             : }
      88             : 
      89             : SIMD_INLINE void v64_store_unaligned(void *p, v64 a) {
      90             :   _mm_storel_epi64((__m128i *)p, a);
      91             : }
      92             : 
      93             : // The following function requires an immediate.
      94             : #if defined(__OPTIMIZE__) && __OPTIMIZE__
      95             : #define v64_align(a, b, c) \
      96             :   ((c) ? _mm_srli_si128(_mm_unpacklo_epi64(b, a), (c)) : b)
      97             : #else
      98             : #define v64_align(a, b, c)                                                  \
      99             :   ((c) ? v64_from_64((v64_u64(b) >> (c)*8) | (v64_u64(a) << (8 - (c)) * 8)) \
     100             :        : (b))
     101             : #endif
     102             : 
     103           0 : SIMD_INLINE v64 v64_zero() { return _mm_setzero_si128(); }
     104             : 
     105             : SIMD_INLINE v64 v64_dup_8(uint8_t x) { return _mm_set1_epi8(x); }
     106             : 
     107             : SIMD_INLINE v64 v64_dup_16(uint16_t x) { return _mm_set1_epi16(x); }
     108             : 
     109             : SIMD_INLINE v64 v64_dup_32(uint32_t x) { return _mm_set1_epi32(x); }
     110             : 
     111             : SIMD_INLINE v64 v64_add_8(v64 a, v64 b) { return _mm_add_epi8(a, b); }
     112             : 
     113             : SIMD_INLINE v64 v64_add_16(v64 a, v64 b) { return _mm_add_epi16(a, b); }
     114             : 
     115             : SIMD_INLINE v64 v64_sadd_s16(v64 a, v64 b) { return _mm_adds_epi16(a, b); }
     116             : 
     117             : SIMD_INLINE v64 v64_add_32(v64 a, v64 b) { return _mm_add_epi32(a, b); }
     118             : 
     119             : SIMD_INLINE v64 v64_sub_8(v64 a, v64 b) { return _mm_sub_epi8(a, b); }
     120             : 
     121             : SIMD_INLINE v64 v64_ssub_u8(v64 a, v64 b) { return _mm_subs_epu8(a, b); }
     122             : 
     123             : SIMD_INLINE v64 v64_ssub_s8(v64 a, v64 b) { return _mm_subs_epi8(a, b); }
     124             : 
     125             : SIMD_INLINE v64 v64_sub_16(v64 a, v64 b) { return _mm_sub_epi16(a, b); }
     126             : 
     127             : SIMD_INLINE v64 v64_ssub_s16(v64 a, v64 b) { return _mm_subs_epi16(a, b); }
     128             : 
     129             : SIMD_INLINE v64 v64_ssub_u16(v64 a, v64 b) { return _mm_subs_epu16(a, b); }
     130             : 
     131             : SIMD_INLINE v64 v64_sub_32(v64 a, v64 b) { return _mm_sub_epi32(a, b); }
     132             : 
     133             : SIMD_INLINE v64 v64_abs_s16(v64 a) {
     134             : #if defined(__SSSE3__)
     135             :   return _mm_abs_epi16(a);
     136             : #else
     137             :   return _mm_max_epi16(a, _mm_sub_epi16(_mm_setzero_si128(), a));
     138             : #endif
     139             : }
     140             : 
     141             : SIMD_INLINE v64 v64_abs_s8(v64 a) {
     142             : #if defined(__SSSE3__)
     143             :   return _mm_abs_epi8(a);
     144             : #else
     145             :   v64 sign = _mm_cmplt_epi8(a, _mm_setzero_si128());
     146             :   return _mm_xor_si128(sign, _mm_add_epi8(a, sign));
     147             : #endif
     148             : }
     149             : 
     150             : SIMD_INLINE v64 v64_ziplo_8(v64 a, v64 b) { return _mm_unpacklo_epi8(b, a); }
     151             : 
     152             : SIMD_INLINE v64 v64_ziphi_8(v64 a, v64 b) {
     153             :   return _mm_srli_si128(_mm_unpacklo_epi8(b, a), 8);
     154             : }
     155             : 
     156             : SIMD_INLINE v64 v64_ziplo_16(v64 a, v64 b) { return _mm_unpacklo_epi16(b, a); }
     157             : 
     158             : SIMD_INLINE v64 v64_ziphi_16(v64 a, v64 b) {
     159             :   return _mm_srli_si128(_mm_unpacklo_epi16(b, a), 8);
     160             : }
     161             : 
     162             : SIMD_INLINE v64 v64_ziplo_32(v64 a, v64 b) { return _mm_unpacklo_epi32(b, a); }
     163             : 
     164             : SIMD_INLINE v64 v64_ziphi_32(v64 a, v64 b) {
     165             :   return _mm_srli_si128(_mm_unpacklo_epi32(b, a), 8);
     166             : }
     167             : 
     168             : SIMD_INLINE v64 v64_pack_s32_s16(v64 a, v64 b) {
     169             :   __m128i t = _mm_unpacklo_epi64(b, a);
     170             :   return _mm_packs_epi32(t, t);
     171             : }
     172             : 
     173             : SIMD_INLINE v64 v64_pack_s16_u8(v64 a, v64 b) {
     174             :   __m128i t = _mm_unpacklo_epi64(b, a);
     175             :   return _mm_packus_epi16(t, t);
     176             : }
     177             : 
     178             : SIMD_INLINE v64 v64_pack_s16_s8(v64 a, v64 b) {
     179             :   __m128i t = _mm_unpacklo_epi64(b, a);
     180             :   return _mm_packs_epi16(t, t);
     181             : }
     182             : 
     183             : SIMD_INLINE v64 v64_unziphi_8(v64 a, v64 b) {
     184             : #if defined(__SSSE3__)
     185             :   return _mm_shuffle_epi8(_mm_unpacklo_epi64(b, a),
     186             :                           v64_from_64(0x0f0d0b0907050301LL));
     187             : #else
     188             :   return _mm_packus_epi16(
     189             :       _mm_unpacklo_epi64(_mm_srli_epi16(b, 8), _mm_srli_epi16(a, 8)),
     190             :       _mm_setzero_si128());
     191             : #endif
     192             : }
     193             : 
     194             : SIMD_INLINE v64 v64_unziplo_8(v64 a, v64 b) {
     195             : #if defined(__SSSE3__)
     196             :   return _mm_shuffle_epi8(_mm_unpacklo_epi64(b, a),
     197             :                           v64_from_64(0x0e0c0a0806040200LL));
     198             : #else
     199             :   return v64_unziphi_8(_mm_slli_si128(a, 1), _mm_slli_si128(b, 1));
     200             : #endif
     201             : }
     202             : 
     203             : SIMD_INLINE v64 v64_unziphi_16(v64 a, v64 b) {
     204             : #if defined(__SSSE3__)
     205             :   return _mm_shuffle_epi8(_mm_unpacklo_epi64(b, a),
     206             :                           v64_from_64(0x0f0e0b0a07060302LL));
     207             : #else
     208             :   return _mm_packs_epi32(
     209             :       _mm_unpacklo_epi64(_mm_srai_epi32(b, 16), _mm_srai_epi32(a, 16)),
     210             :       _mm_setzero_si128());
     211             : #endif
     212             : }
     213             : 
     214             : SIMD_INLINE v64 v64_unziplo_16(v64 a, v64 b) {
     215             : #if defined(__SSSE3__)
     216             :   return _mm_shuffle_epi8(_mm_unpacklo_epi64(b, a),
     217             :                           v64_from_64(0x0d0c090805040100LL));
     218             : #else
     219             :   return v64_unziphi_16(_mm_slli_si128(a, 2), _mm_slli_si128(b, 2));
     220             : #endif
     221             : }
     222             : 
     223             : SIMD_INLINE v64 v64_unpacklo_u8_s16(v64 a) {
     224             :   return _mm_unpacklo_epi8(a, _mm_setzero_si128());
     225             : }
     226             : 
     227             : SIMD_INLINE v64 v64_unpackhi_u8_s16(v64 a) {
     228             :   return _mm_srli_si128(_mm_unpacklo_epi8(a, _mm_setzero_si128()), 8);
     229             : }
     230             : 
     231             : SIMD_INLINE v64 v64_unpacklo_s8_s16(v64 a) {
     232             :   return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8);
     233             : }
     234             : 
     235             : SIMD_INLINE v64 v64_unpackhi_s8_s16(v64 a) {
     236             :   return _mm_srli_si128(_mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8), 8);
     237             : }
     238             : 
     239             : SIMD_INLINE v64 v64_unpacklo_u16_s32(v64 a) {
     240             :   return _mm_unpacklo_epi16(a, _mm_setzero_si128());
     241             : }
     242             : 
     243             : SIMD_INLINE v64 v64_unpacklo_s16_s32(v64 a) {
     244             :   return _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), a), 16);
     245             : }
     246             : 
     247             : SIMD_INLINE v64 v64_unpackhi_u16_s32(v64 a) {
     248             :   return _mm_srli_si128(_mm_unpacklo_epi16(a, _mm_setzero_si128()), 8);
     249             : }
     250             : 
     251             : SIMD_INLINE v64 v64_unpackhi_s16_s32(v64 a) {
     252             :   return _mm_srli_si128(
     253             :       _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), a), 16), 8);
     254             : }
     255             : 
     256             : SIMD_INLINE v64 v64_shuffle_8(v64 x, v64 pattern) {
     257             : #if defined(__SSSE3__)
     258             :   return _mm_shuffle_epi8(x, pattern);
     259             : #else
     260             :   v64 output;
     261             :   unsigned char *input = (unsigned char *)&x;
     262             :   unsigned char *index = (unsigned char *)&pattern;
     263             :   char *selected = (char *)&output;
     264             :   int counter;
     265             : 
     266             :   for (counter = 0; counter < 8; counter++) {
     267             :     selected[counter] = input[index[counter]];
     268             :   }
     269             : 
     270             :   return output;
     271             : #endif
     272             : }
     273             : 
     274             : SIMD_INLINE int64_t v64_dotp_su8(v64 a, v64 b) {
     275             :   __m128i r, r1, r2, z;
     276             :   z = _mm_setzero_si128();
     277             :   r1 = _mm_madd_epi16(_mm_slli_epi16(_mm_unpacklo_epi8(a, z), 8),
     278             :                       _mm_unpacklo_epi8(b, z));
     279             :   r2 = _mm_srli_si128(r1, 8);
     280             :   r = _mm_add_epi32(r1, r2);
     281             :   r = _mm_add_epi32(r, _mm_srli_si128(r, 4));
     282             :   return ((int32_t)v64_low_u32(r)) >> 8;
     283             : }
     284             : 
     285             : SIMD_INLINE int64_t v64_dotp_s16(v64 a, v64 b) {
     286             :   __m128i r = _mm_madd_epi16(a, b);
     287             : #if defined(__SSE4_1__) && defined(__x86_64__)
     288             :   __m128i x = _mm_cvtepi32_epi64(r);
     289             :   return _mm_cvtsi128_si64(_mm_add_epi64(x, _mm_srli_si128(x, 8)));
     290             : #else
     291             :   return (int64_t)_mm_cvtsi128_si32(_mm_srli_si128(r, 4)) +
     292             :          (int64_t)_mm_cvtsi128_si32(r);
     293             : #endif
     294             : }
     295             : 
     296             : SIMD_INLINE uint64_t v64_hadd_u8(v64 a) {
     297             :   return v64_low_u32(_mm_sad_epu8(a, _mm_setzero_si128()));
     298             : }
     299             : 
     300             : SIMD_INLINE int64_t v64_hadd_s16(v64 a) {
     301             :   return v64_dotp_s16(a, v64_dup_16(1));
     302             : }
     303             : 
     304             : typedef v64 sad64_internal;
     305             : 
     306             : SIMD_INLINE sad64_internal v64_sad_u8_init() { return _mm_setzero_si128(); }
     307             : 
     308             : /* Implementation dependent return value.  Result must be finalised with
     309             :    v64_sad_u8_sum().
     310             :    The result for more than 32 v64_sad_u8() calls is undefined. */
     311             : SIMD_INLINE sad64_internal v64_sad_u8(sad64_internal s, v64 a, v64 b) {
     312             :   return _mm_add_epi64(s, _mm_sad_epu8(a, b));
     313             : }
     314             : 
     315             : SIMD_INLINE uint32_t v64_sad_u8_sum(sad64_internal s) { return v64_low_u32(s); }
     316             : 
     317             : typedef v64 ssd64_internal;
     318             : 
     319             : SIMD_INLINE ssd64_internal v64_ssd_u8_init() { return _mm_setzero_si128(); }
     320             : 
     321             : /* Implementation dependent return value.  Result must be finalised with
     322             :  * v64_ssd_u8_sum(). */
     323             : SIMD_INLINE ssd64_internal v64_ssd_u8(ssd64_internal s, v64 a, v64 b) {
     324             :   v64 l = v64_sub_16(v64_ziplo_8(v64_zero(), a), v64_ziplo_8(v64_zero(), b));
     325             :   v64 h = v64_sub_16(v64_ziphi_8(v64_zero(), a), v64_ziphi_8(v64_zero(), b));
     326             :   v64 r = v64_add_32(_mm_madd_epi16(l, l), _mm_madd_epi16(h, h));
     327             :   return _mm_add_epi64(
     328             :       s, v64_ziplo_32(v64_zero(), _mm_add_epi32(r, _mm_srli_si128(r, 4))));
     329             : }
     330             : 
     331             : SIMD_INLINE uint32_t v64_ssd_u8_sum(sad64_internal s) { return v64_low_u32(s); }
     332             : 
     333             : SIMD_INLINE v64 v64_or(v64 a, v64 b) { return _mm_or_si128(a, b); }
     334             : 
     335             : SIMD_INLINE v64 v64_xor(v64 a, v64 b) { return _mm_xor_si128(a, b); }
     336             : 
     337             : SIMD_INLINE v64 v64_and(v64 a, v64 b) { return _mm_and_si128(a, b); }
     338             : 
     339             : SIMD_INLINE v64 v64_andn(v64 a, v64 b) { return _mm_andnot_si128(b, a); }
     340             : 
     341             : SIMD_INLINE v64 v64_mullo_s16(v64 a, v64 b) { return _mm_mullo_epi16(a, b); }
     342             : 
     343             : SIMD_INLINE v64 v64_mulhi_s16(v64 a, v64 b) { return _mm_mulhi_epi16(a, b); }
     344             : 
     345             : SIMD_INLINE v64 v64_mullo_s32(v64 a, v64 b) {
     346             : #if defined(__SSE4_1__)
     347             :   return _mm_mullo_epi32(a, b);
     348             : #else
     349             :   return _mm_unpacklo_epi32(
     350             :       _mm_mul_epu32(a, b),
     351             :       _mm_mul_epu32(_mm_srli_si128(a, 4), _mm_srli_si128(b, 4)));
     352             : #endif
     353             : }
     354             : 
     355             : SIMD_INLINE v64 v64_madd_s16(v64 a, v64 b) { return _mm_madd_epi16(a, b); }
     356             : 
     357             : SIMD_INLINE v64 v64_madd_us8(v64 a, v64 b) {
     358             : #if defined(__SSSE3__)
     359             :   return _mm_maddubs_epi16(a, b);
     360             : #else
     361             :   __m128i t = _mm_madd_epi16(_mm_unpacklo_epi8(a, _mm_setzero_si128()),
     362             :                              _mm_srai_epi16(_mm_unpacklo_epi8(b, b), 8));
     363             :   return _mm_packs_epi32(t, t);
     364             : #endif
     365             : }
     366             : 
     367             : SIMD_INLINE v64 v64_avg_u8(v64 a, v64 b) { return _mm_avg_epu8(a, b); }
     368             : 
     369             : SIMD_INLINE v64 v64_rdavg_u8(v64 a, v64 b) {
     370             :   return _mm_sub_epi8(_mm_avg_epu8(a, b),
     371             :                       _mm_and_si128(_mm_xor_si128(a, b), v64_dup_8(1)));
     372             : }
     373             : 
     374             : SIMD_INLINE v64 v64_avg_u16(v64 a, v64 b) { return _mm_avg_epu16(a, b); }
     375             : 
     376             : SIMD_INLINE v64 v64_min_u8(v64 a, v64 b) { return _mm_min_epu8(a, b); }
     377             : 
     378             : SIMD_INLINE v64 v64_max_u8(v64 a, v64 b) { return _mm_max_epu8(a, b); }
     379             : 
     380             : SIMD_INLINE v64 v64_min_s8(v64 a, v64 b) {
     381             : #if defined(__SSE4_1__)
     382             :   return _mm_min_epi8(a, b);
     383             : #else
     384             :   v64 mask = _mm_cmplt_epi8(a, b);
     385             :   return _mm_or_si128(_mm_andnot_si128(mask, b), _mm_and_si128(mask, a));
     386             : #endif
     387             : }
     388             : 
     389             : SIMD_INLINE v64 v64_max_s8(v64 a, v64 b) {
     390             : #if defined(__SSE4_1__)
     391             :   return _mm_max_epi8(a, b);
     392             : #else
     393             :   v64 mask = _mm_cmplt_epi8(b, a);
     394             :   return _mm_or_si128(_mm_andnot_si128(mask, b), _mm_and_si128(mask, a));
     395             : #endif
     396             : }
     397             : 
     398             : SIMD_INLINE v64 v64_min_s16(v64 a, v64 b) { return _mm_min_epi16(a, b); }
     399             : 
     400             : SIMD_INLINE v64 v64_max_s16(v64 a, v64 b) { return _mm_max_epi16(a, b); }
     401             : 
     402             : SIMD_INLINE v64 v64_cmpgt_s8(v64 a, v64 b) { return _mm_cmpgt_epi8(a, b); }
     403             : 
     404             : SIMD_INLINE v64 v64_cmplt_s8(v64 a, v64 b) { return _mm_cmplt_epi8(a, b); }
     405             : 
     406             : SIMD_INLINE v64 v64_cmpeq_8(v64 a, v64 b) { return _mm_cmpeq_epi8(a, b); }
     407             : 
     408             : SIMD_INLINE v64 v64_cmpgt_s16(v64 a, v64 b) { return _mm_cmpgt_epi16(a, b); }
     409             : 
     410             : SIMD_INLINE v64 v64_cmplt_s16(v64 a, v64 b) { return _mm_cmplt_epi16(a, b); }
     411             : 
     412             : SIMD_INLINE v64 v64_cmpeq_16(v64 a, v64 b) { return _mm_cmpeq_epi16(a, b); }
     413             : 
     414             : SIMD_INLINE v64 v64_shl_8(v64 a, unsigned int c) {
     415             :   return _mm_and_si128(_mm_set1_epi8((uint8_t)(0xff << c)),
     416             :                        _mm_sll_epi16(a, _mm_cvtsi32_si128(c)));
     417             : }
     418             : 
     419             : SIMD_INLINE v64 v64_shr_u8(v64 a, unsigned int c) {
     420             :   return _mm_and_si128(_mm_set1_epi8(0xff >> c),
     421             :                        _mm_srl_epi16(a, _mm_cvtsi32_si128(c)));
     422             : }
     423             : 
     424             : SIMD_INLINE v64 v64_shr_s8(v64 a, unsigned int c) {
     425             :   return _mm_packs_epi16(
     426             :       _mm_sra_epi16(_mm_unpacklo_epi8(a, a), _mm_cvtsi32_si128(c + 8)), a);
     427             : }
     428             : 
     429             : SIMD_INLINE v64 v64_shl_16(v64 a, unsigned int c) {
     430             :   return _mm_sll_epi16(a, _mm_cvtsi32_si128(c));
     431             : }
     432             : 
     433             : SIMD_INLINE v64 v64_shr_u16(v64 a, unsigned int c) {
     434             :   return _mm_srl_epi16(a, _mm_cvtsi32_si128(c));
     435             : }
     436             : 
     437             : SIMD_INLINE v64 v64_shr_s16(v64 a, unsigned int c) {
     438             :   return _mm_sra_epi16(a, _mm_cvtsi32_si128(c));
     439             : }
     440             : 
     441             : SIMD_INLINE v64 v64_shl_32(v64 a, unsigned int c) {
     442             :   return _mm_sll_epi32(a, _mm_cvtsi32_si128(c));
     443             : }
     444             : 
     445             : SIMD_INLINE v64 v64_shr_u32(v64 a, unsigned int c) {
     446             :   return _mm_srl_epi32(a, _mm_cvtsi32_si128(c));
     447             : }
     448             : 
     449             : SIMD_INLINE v64 v64_shr_s32(v64 a, unsigned int c) {
     450             :   return _mm_sra_epi32(a, _mm_cvtsi32_si128(c));
     451             : }
     452             : 
     453             : /* These intrinsics require immediate values, so we must use #defines
     454             :    to enforce that. */
     455             : #define v64_shl_n_byte(a, c) _mm_slli_si128(a, c)
     456             : #define v64_shr_n_byte(a, c) _mm_srli_si128(_mm_unpacklo_epi64(a, a), c + 8)
     457             : #define v64_shl_n_8(a, c) \
     458             :   _mm_and_si128(_mm_set1_epi8((uint8_t)(0xff << (c))), _mm_slli_epi16(a, c))
     459             : #define v64_shr_n_u8(a, c) \
     460             :   _mm_and_si128(_mm_set1_epi8(0xff >> (c)), _mm_srli_epi16(a, c))
     461             : #define v64_shr_n_s8(a, c) \
     462             :   _mm_packs_epi16(_mm_srai_epi16(_mm_unpacklo_epi8(a, a), (c) + 8), a)
     463             : #define v64_shl_n_16(a, c) _mm_slli_epi16(a, c)
     464             : #define v64_shr_n_u16(a, c) _mm_srli_epi16(a, c)
     465             : #define v64_shr_n_s16(a, c) _mm_srai_epi16(a, c)
     466             : #define v64_shl_n_32(a, c) _mm_slli_epi32(a, c)
     467             : #define v64_shr_n_u32(a, c) _mm_srli_epi32(a, c)
     468             : #define v64_shr_n_s32(a, c) _mm_srai_epi32(a, c)
     469             : 
     470             : #endif /* _V64_INTRINSICS_H */

Generated by: LCOV version 1.13