LCOV - code coverage report
Current view: top level - third_party/aom/aom_dsp/simd - v128_intrinsics_x86.h (source / functions) Hit Total Coverage
Test: output.info Lines: 0 54 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 0 -
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
       3             :  *
       4             :  * This source code is subject to the terms of the BSD 2 Clause License and
       5             :  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
       6             :  * was not distributed with this source code in the LICENSE file, you can
       7             :  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
       8             :  * Media Patent License 1.0 was not distributed with this source code in the
       9             :  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
      10             :  */
      11             : 
      12             : #ifndef _V128_INTRINSICS_H
      13             : #define _V128_INTRINSICS_H
      14             : 
      15             : #include "./v64_intrinsics_x86.h"
      16             : 
      17             : typedef __m128i v128;
      18             : 
      19             : SIMD_INLINE uint32_t v128_low_u32(v128 a) {
      20           0 :   return (uint32_t)_mm_cvtsi128_si32(a);
      21             : }
      22             : 
      23             : SIMD_INLINE v64 v128_low_v64(v128 a) {
      24           0 :   return _mm_unpacklo_epi64(a, v64_zero());
      25             : }
      26             : 
      27           0 : SIMD_INLINE v64 v128_high_v64(v128 a) { return _mm_srli_si128(a, 8); }
      28             : 
      29             : SIMD_INLINE v128 v128_from_v64(v64 a, v64 b) {
      30           0 :   return _mm_unpacklo_epi64(b, a);
      31             : }
      32             : 
      33             : SIMD_INLINE v128 v128_from_64(uint64_t a, uint64_t b) {
      34             :   return v128_from_v64(v64_from_64(a), v64_from_64(b));
      35             : }
      36             : 
      37             : SIMD_INLINE v128 v128_from_32(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
      38           0 :   return _mm_set_epi32(a, b, c, d);
      39             : }
      40             : 
      41             : SIMD_INLINE v128 v128_load_aligned(const void *p) {
      42           0 :   return _mm_load_si128((__m128i *)p);
      43             : }
      44             : 
      45             : SIMD_INLINE v128 v128_load_unaligned(const void *p) {
      46             : #if defined(__SSSE3__)
      47           0 :   return (__m128i)_mm_lddqu_si128((__m128i *)p);
      48             : #else
      49           0 :   return _mm_loadu_si128((__m128i *)p);
      50             : #endif
      51             : }
      52             : 
      53             : SIMD_INLINE void v128_store_aligned(void *p, v128 a) {
      54             :   _mm_store_si128((__m128i *)p, a);
      55             : }
      56             : 
      57             : SIMD_INLINE void v128_store_unaligned(void *p, v128 a) {
      58             :   _mm_storeu_si128((__m128i *)p, a);
      59             : }
      60             : 
      61             : // The following function requires an immediate.
      62             : // Some compilers will check this during optimisation, others wont.
      63             : #if defined(__OPTIMIZE__) && __OPTIMIZE__ && !defined(__clang__)
      64             : #if defined(__SSSE3__)
      65             : SIMD_INLINE v128 v128_align(v128 a, v128 b, unsigned int c) {
      66             :   return c ? _mm_alignr_epi8(a, b, c) : b;
      67             : }
      68             : #else
      69             : #define v128_align(a, b, c) \
      70             :   ((c) ? _mm_or_si128(_mm_srli_si128(b, c), _mm_slli_si128(a, 16 - (c))) : (b))
      71             : #endif
      72             : #else
      73             : #if defined(__SSSE3__)
      74             : #define v128_align(a, b, c) ((c) ? _mm_alignr_epi8(a, b, c) : (b))
      75             : #else
      76             : #define v128_align(a, b, c) \
      77             :   ((c) ? _mm_or_si128(_mm_srli_si128(b, c), _mm_slli_si128(a, 16 - (c))) : (b))
      78             : #endif
      79             : #endif
      80             : 
      81           0 : SIMD_INLINE v128 v128_zero() { return _mm_setzero_si128(); }
      82             : 
      83           0 : SIMD_INLINE v128 v128_dup_8(uint8_t x) { return _mm_set1_epi8(x); }
      84             : 
      85           0 : SIMD_INLINE v128 v128_dup_16(uint16_t x) { return _mm_set1_epi16(x); }
      86             : 
      87           0 : SIMD_INLINE v128 v128_dup_32(uint32_t x) { return _mm_set1_epi32(x); }
      88             : 
      89           0 : SIMD_INLINE v128 v128_add_8(v128 a, v128 b) { return _mm_add_epi8(a, b); }
      90             : 
      91           0 : SIMD_INLINE v128 v128_add_16(v128 a, v128 b) { return _mm_add_epi16(a, b); }
      92             : 
      93             : SIMD_INLINE v128 v128_sadd_s16(v128 a, v128 b) { return _mm_adds_epi16(a, b); }
      94             : 
      95           0 : SIMD_INLINE v128 v128_add_32(v128 a, v128 b) { return _mm_add_epi32(a, b); }
      96             : 
      97             : SIMD_INLINE v128 v128_padd_s16(v128 a) {
      98             :   return _mm_madd_epi16(a, _mm_set1_epi16(1));
      99             : }
     100             : 
     101             : SIMD_INLINE v128 v128_sub_8(v128 a, v128 b) { return _mm_sub_epi8(a, b); }
     102             : 
     103           0 : SIMD_INLINE v128 v128_ssub_u8(v128 a, v128 b) { return _mm_subs_epu8(a, b); }
     104             : 
     105             : SIMD_INLINE v128 v128_ssub_s8(v128 a, v128 b) { return _mm_subs_epi8(a, b); }
     106             : 
     107           0 : SIMD_INLINE v128 v128_sub_16(v128 a, v128 b) { return _mm_sub_epi16(a, b); }
     108             : 
     109             : SIMD_INLINE v128 v128_ssub_s16(v128 a, v128 b) { return _mm_subs_epi16(a, b); }
     110             : 
     111           0 : SIMD_INLINE v128 v128_ssub_u16(v128 a, v128 b) { return _mm_subs_epu16(a, b); }
     112             : 
     113             : SIMD_INLINE v128 v128_sub_32(v128 a, v128 b) { return _mm_sub_epi32(a, b); }
     114             : 
     115             : SIMD_INLINE v128 v128_abs_s16(v128 a) {
     116             : #if defined(__SSSE3__)
     117           0 :   return _mm_abs_epi16(a);
     118             : #else
     119           0 :   return _mm_max_epi16(a, _mm_sub_epi16(_mm_setzero_si128(), a));
     120             : #endif
     121             : }
     122             : 
     123             : SIMD_INLINE v128 v128_abs_s8(v128 a) {
     124             : #if defined(__SSSE3__)
     125           0 :   return _mm_abs_epi8(a);
     126             : #else
     127           0 :   v128 sign = _mm_cmplt_epi8(a, _mm_setzero_si128());
     128           0 :   return _mm_xor_si128(sign, _mm_add_epi8(a, sign));
     129             : #endif
     130             : }
     131             : 
     132             : SIMD_INLINE v128 v128_ziplo_8(v128 a, v128 b) {
     133             :   return _mm_unpacklo_epi8(b, a);
     134             : }
     135             : 
     136             : SIMD_INLINE v128 v128_ziphi_8(v128 a, v128 b) {
     137             :   return _mm_unpackhi_epi8(b, a);
     138             : }
     139             : 
     140             : SIMD_INLINE v128 v128_ziplo_16(v128 a, v128 b) {
     141           0 :   return _mm_unpacklo_epi16(b, a);
     142             : }
     143             : 
     144             : SIMD_INLINE v128 v128_ziphi_16(v128 a, v128 b) {
     145           0 :   return _mm_unpackhi_epi16(b, a);
     146             : }
     147             : 
     148             : SIMD_INLINE v128 v128_ziplo_32(v128 a, v128 b) {
     149           0 :   return _mm_unpacklo_epi32(b, a);
     150             : }
     151             : 
     152             : SIMD_INLINE v128 v128_ziphi_32(v128 a, v128 b) {
     153           0 :   return _mm_unpackhi_epi32(b, a);
     154             : }
     155             : 
     156             : SIMD_INLINE v128 v128_ziplo_64(v128 a, v128 b) {
     157           0 :   return _mm_unpacklo_epi64(b, a);
     158             : }
     159             : 
     160             : SIMD_INLINE v128 v128_ziphi_64(v128 a, v128 b) {
     161           0 :   return _mm_unpackhi_epi64(b, a);
     162             : }
     163             : 
     164             : SIMD_INLINE v128 v128_zip_8(v64 a, v64 b) { return _mm_unpacklo_epi8(b, a); }
     165             : 
     166             : SIMD_INLINE v128 v128_zip_16(v64 a, v64 b) { return _mm_unpacklo_epi16(b, a); }
     167             : 
     168             : SIMD_INLINE v128 v128_zip_32(v64 a, v64 b) { return _mm_unpacklo_epi32(b, a); }
     169             : 
     170             : SIMD_INLINE v128 v128_unziphi_8(v128 a, v128 b) {
     171             :   return _mm_packs_epi16(_mm_srai_epi16(b, 8), _mm_srai_epi16(a, 8));
     172             : }
     173             : 
     174             : SIMD_INLINE v128 v128_unziplo_8(v128 a, v128 b) {
     175             : #if defined(__SSSE3__)
     176             : #ifdef __x86_64__
     177             :   v128 order = _mm_cvtsi64_si128(0x0e0c0a0806040200LL);
     178             : #else
     179             :   v128 order = _mm_set_epi32(0, 0, 0x0e0c0a08, 0x06040200);
     180             : #endif
     181             :   return _mm_unpacklo_epi64(_mm_shuffle_epi8(b, order),
     182             :                             _mm_shuffle_epi8(a, order));
     183             : #else
     184             :   return v128_unziphi_8(_mm_slli_si128(a, 1), _mm_slli_si128(b, 1));
     185             : #endif
     186             : }
     187             : 
     188             : SIMD_INLINE v128 v128_unziphi_16(v128 a, v128 b) {
     189             :   return _mm_packs_epi32(_mm_srai_epi32(b, 16), _mm_srai_epi32(a, 16));
     190             : }
     191             : 
     192             : SIMD_INLINE v128 v128_unziplo_16(v128 a, v128 b) {
     193             : #if defined(__SSSE3__)
     194             : #ifdef __x86_64__
     195             :   v128 order = _mm_cvtsi64_si128(0x0d0c090805040100LL);
     196             : #else
     197             :   v128 order = _mm_set_epi32(0, 0, 0x0d0c0908, 0x05040100);
     198             : #endif
     199             :   return _mm_unpacklo_epi64(_mm_shuffle_epi8(b, order),
     200             :                             _mm_shuffle_epi8(a, order));
     201             : #else
     202             :   return v128_unziphi_16(_mm_slli_si128(a, 2), _mm_slli_si128(b, 2));
     203             : #endif
     204             : }
     205             : 
     206             : SIMD_INLINE v128 v128_unziphi_32(v128 a, v128 b) {
     207             :   return _mm_castps_si128(_mm_shuffle_ps(
     208             :       _mm_castsi128_ps(b), _mm_castsi128_ps(a), _MM_SHUFFLE(3, 1, 3, 1)));
     209             : }
     210             : 
     211             : SIMD_INLINE v128 v128_unziplo_32(v128 a, v128 b) {
     212             :   return _mm_castps_si128(_mm_shuffle_ps(
     213             :       _mm_castsi128_ps(b), _mm_castsi128_ps(a), _MM_SHUFFLE(2, 0, 2, 0)));
     214             : }
     215             : 
     216             : SIMD_INLINE v128 v128_unpack_u8_s16(v64 a) {
     217           0 :   return _mm_unpacklo_epi8(a, _mm_setzero_si128());
     218             : }
     219             : 
     220             : SIMD_INLINE v128 v128_unpacklo_u8_s16(v128 a) {
     221             :   return _mm_unpacklo_epi8(a, _mm_setzero_si128());
     222             : }
     223             : 
     224             : SIMD_INLINE v128 v128_unpackhi_u8_s16(v128 a) {
     225             :   return _mm_unpackhi_epi8(a, _mm_setzero_si128());
     226             : }
     227             : 
     228             : SIMD_INLINE v128 v128_unpack_s8_s16(v64 a) {
     229             :   return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8);
     230             : }
     231             : 
     232             : SIMD_INLINE v128 v128_unpacklo_s8_s16(v128 a) {
     233             :   return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8);
     234             : }
     235             : 
     236             : SIMD_INLINE v128 v128_unpackhi_s8_s16(v128 a) {
     237             :   return _mm_srai_epi16(_mm_unpackhi_epi8(a, a), 8);
     238             : }
     239             : 
     240             : SIMD_INLINE v128 v128_pack_s32_s16(v128 a, v128 b) {
     241             :   return _mm_packs_epi32(b, a);
     242             : }
     243             : 
     244             : SIMD_INLINE v128 v128_pack_s16_u8(v128 a, v128 b) {
     245           0 :   return _mm_packus_epi16(b, a);
     246             : }
     247             : 
     248             : SIMD_INLINE v128 v128_pack_s16_s8(v128 a, v128 b) {
     249           0 :   return _mm_packs_epi16(b, a);
     250             : }
     251             : 
     252             : SIMD_INLINE v128 v128_unpack_u16_s32(v64 a) {
     253             :   return _mm_unpacklo_epi16(a, _mm_setzero_si128());
     254             : }
     255             : 
     256             : SIMD_INLINE v128 v128_unpack_s16_s32(v64 a) {
     257             :   return _mm_srai_epi32(_mm_unpacklo_epi16(a, a), 16);
     258             : }
     259             : 
     260             : SIMD_INLINE v128 v128_unpacklo_u16_s32(v128 a) {
     261             :   return _mm_unpacklo_epi16(a, _mm_setzero_si128());
     262             : }
     263             : 
     264             : SIMD_INLINE v128 v128_unpacklo_s16_s32(v128 a) {
     265             :   return _mm_srai_epi32(_mm_unpacklo_epi16(a, a), 16);
     266             : }
     267             : 
     268             : SIMD_INLINE v128 v128_unpackhi_u16_s32(v128 a) {
     269             :   return _mm_unpackhi_epi16(a, _mm_setzero_si128());
     270             : }
     271             : 
     272             : SIMD_INLINE v128 v128_unpackhi_s16_s32(v128 a) {
     273             :   return _mm_srai_epi32(_mm_unpackhi_epi16(a, a), 16);
     274             : }
     275             : 
     276             : SIMD_INLINE v128 v128_shuffle_8(v128 x, v128 pattern) {
     277             : #if defined(__SSSE3__)
     278           0 :   return _mm_shuffle_epi8(x, pattern);
     279             : #else
     280             :   v128 output;
     281           0 :   unsigned char *input = (unsigned char *)&x;
     282           0 :   unsigned char *index = (unsigned char *)&pattern;
     283           0 :   char *selected = (char *)&output;
     284             :   int counter;
     285             : 
     286           0 :   for (counter = 0; counter < 16; counter++) {
     287           0 :     selected[counter] = input[index[counter] & 15];
     288             :   }
     289             : 
     290           0 :   return output;
     291             : #endif
     292             : }
     293             : 
     294             : SIMD_INLINE int64_t v128_dotp_s16(v128 a, v128 b) {
     295             :   v128 r = _mm_madd_epi16(a, b);
     296             : #if defined(__SSE4_1__) && defined(__x86_64__)
     297             :   v128 c = _mm_add_epi64(_mm_cvtepi32_epi64(r),
     298             :                          _mm_cvtepi32_epi64(_mm_srli_si128(r, 8)));
     299             :   return _mm_cvtsi128_si64(_mm_add_epi64(c, _mm_srli_si128(c, 8)));
     300             : #else
     301             :   return (int64_t)_mm_cvtsi128_si32(r) +
     302             :          (int64_t)_mm_cvtsi128_si32(_mm_srli_si128(r, 4)) +
     303             :          (int64_t)_mm_cvtsi128_si32(_mm_srli_si128(r, 8)) +
     304             :          (int64_t)_mm_cvtsi128_si32(_mm_srli_si128(r, 12));
     305             : #endif
     306             : }
     307             : 
     308             : SIMD_INLINE uint64_t v128_hadd_u8(v128 a) {
     309             :   v128 t = _mm_sad_epu8(a, _mm_setzero_si128());
     310             :   return v64_low_u32(v128_low_v64(t)) + v64_low_u32(v128_high_v64(t));
     311             : }
     312             : 
     313             : typedef v128 sad128_internal;
     314             : 
     315             : SIMD_INLINE sad128_internal v128_sad_u8_init() { return _mm_setzero_si128(); }
     316             : 
     317             : /* Implementation dependent return value.  Result must be finalised with
     318             :    v128_sad_sum().
     319             :    The result for more than 32 v128_sad_u8() calls is undefined. */
     320             : SIMD_INLINE sad128_internal v128_sad_u8(sad128_internal s, v128 a, v128 b) {
     321             :   return _mm_add_epi64(s, _mm_sad_epu8(a, b));
     322             : }
     323             : 
     324             : SIMD_INLINE uint32_t v128_sad_u8_sum(sad128_internal s) {
     325             :   return v128_low_u32(_mm_add_epi32(s, _mm_unpackhi_epi64(s, s)));
     326             : }
     327             : 
     328             : typedef v128 ssd128_internal;
     329             : 
     330             : SIMD_INLINE ssd128_internal v128_ssd_u8_init() { return _mm_setzero_si128(); }
     331             : 
     332             : /* Implementation dependent return value.  Result must be finalised with
     333             :  * v128_ssd_sum(). */
     334             : SIMD_INLINE ssd128_internal v128_ssd_u8(ssd128_internal s, v128 a, v128 b) {
     335             :   v128 l = _mm_sub_epi16(_mm_unpacklo_epi8(a, _mm_setzero_si128()),
     336             :                          _mm_unpacklo_epi8(b, _mm_setzero_si128()));
     337             :   v128 h = _mm_sub_epi16(_mm_unpackhi_epi8(a, _mm_setzero_si128()),
     338             :                          _mm_unpackhi_epi8(b, _mm_setzero_si128()));
     339             :   v128 rl = _mm_madd_epi16(l, l);
     340             :   v128 rh = _mm_madd_epi16(h, h);
     341             :   v128 c = _mm_cvtsi32_si128(32);
     342             :   rl = _mm_add_epi32(rl, _mm_srli_si128(rl, 8));
     343             :   rl = _mm_add_epi32(rl, _mm_srli_si128(rl, 4));
     344             :   rh = _mm_add_epi32(rh, _mm_srli_si128(rh, 8));
     345             :   rh = _mm_add_epi32(rh, _mm_srli_si128(rh, 4));
     346             :   return _mm_add_epi64(
     347             :       s, _mm_srl_epi64(_mm_sll_epi64(_mm_unpacklo_epi64(rl, rh), c), c));
     348             : }
     349             : 
     350             : SIMD_INLINE uint32_t v128_ssd_u8_sum(ssd128_internal s) {
     351             :   return v128_low_u32(_mm_add_epi32(s, _mm_unpackhi_epi64(s, s)));
     352             : }
     353             : 
     354             : SIMD_INLINE v128 v128_or(v128 a, v128 b) { return _mm_or_si128(a, b); }
     355             : 
     356           0 : SIMD_INLINE v128 v128_xor(v128 a, v128 b) { return _mm_xor_si128(a, b); }
     357             : 
     358             : SIMD_INLINE v128 v128_and(v128 a, v128 b) { return _mm_and_si128(a, b); }
     359             : 
     360             : SIMD_INLINE v128 v128_andn(v128 a, v128 b) { return _mm_andnot_si128(b, a); }
     361             : 
     362             : SIMD_INLINE v128 v128_mul_s16(v64 a, v64 b) {
     363             :   v64 lo_bits = v64_mullo_s16(a, b);
     364             :   v64 hi_bits = v64_mulhi_s16(a, b);
     365             :   return v128_from_v64(v64_ziphi_16(hi_bits, lo_bits),
     366             :                        v64_ziplo_16(hi_bits, lo_bits));
     367             : }
     368             : 
     369             : SIMD_INLINE v128 v128_mullo_s16(v128 a, v128 b) {
     370             :   return _mm_mullo_epi16(a, b);
     371             : }
     372             : 
     373             : SIMD_INLINE v128 v128_mulhi_s16(v128 a, v128 b) {
     374             :   return _mm_mulhi_epi16(a, b);
     375             : }
     376             : 
     377             : SIMD_INLINE v128 v128_mullo_s32(v128 a, v128 b) {
     378             : #if defined(__SSE4_1__)
     379           0 :   return _mm_mullo_epi32(a, b);
     380             : #else
     381           0 :   return _mm_unpacklo_epi32(
     382           0 :       _mm_shuffle_epi32(_mm_mul_epu32(a, b), 8),
     383           0 :       _mm_shuffle_epi32(
     384             :           _mm_mul_epu32(_mm_srli_si128(a, 4), _mm_srli_si128(b, 4)), 8));
     385             : #endif
     386             : }
     387             : 
     388           0 : SIMD_INLINE v128 v128_madd_s16(v128 a, v128 b) { return _mm_madd_epi16(a, b); }
     389             : 
     390             : SIMD_INLINE v128 v128_madd_us8(v128 a, v128 b) {
     391             : #if defined(__SSSE3__)
     392             :   return _mm_maddubs_epi16(a, b);
     393             : #else
     394             :   return _mm_packs_epi32(
     395             :       _mm_madd_epi16(_mm_unpacklo_epi8(a, _mm_setzero_si128()),
     396             :                      _mm_srai_epi16(_mm_unpacklo_epi8(b, b), 8)),
     397             :       _mm_madd_epi16(_mm_unpackhi_epi8(a, _mm_setzero_si128()),
     398             :                      _mm_srai_epi16(_mm_unpackhi_epi8(b, b), 8)));
     399             : #endif
     400             : }
     401             : 
     402             : SIMD_INLINE v128 v128_avg_u8(v128 a, v128 b) { return _mm_avg_epu8(a, b); }
     403             : 
     404             : SIMD_INLINE v128 v128_rdavg_u8(v128 a, v128 b) {
     405             :   return _mm_sub_epi8(_mm_avg_epu8(a, b),
     406             :                       _mm_and_si128(_mm_xor_si128(a, b), v128_dup_8(1)));
     407             : }
     408             : 
     409             : SIMD_INLINE v128 v128_avg_u16(v128 a, v128 b) { return _mm_avg_epu16(a, b); }
     410             : 
     411           0 : SIMD_INLINE v128 v128_min_u8(v128 a, v128 b) { return _mm_min_epu8(a, b); }
     412             : 
     413             : SIMD_INLINE v128 v128_max_u8(v128 a, v128 b) { return _mm_max_epu8(a, b); }
     414             : 
     415             : SIMD_INLINE v128 v128_min_s8(v128 a, v128 b) {
     416             : #if defined(__SSE4_1__)
     417             :   return _mm_min_epi8(a, b);
     418             : #else
     419             :   v128 mask = _mm_cmplt_epi8(a, b);
     420             :   return _mm_or_si128(_mm_andnot_si128(mask, b), _mm_and_si128(mask, a));
     421             : #endif
     422             : }
     423             : 
     424             : SIMD_INLINE v128 v128_max_s8(v128 a, v128 b) {
     425             : #if defined(__SSE4_1__)
     426             :   return _mm_max_epi8(a, b);
     427             : #else
     428             :   v128 mask = _mm_cmplt_epi8(b, a);
     429             :   return _mm_or_si128(_mm_andnot_si128(mask, b), _mm_and_si128(mask, a));
     430             : #endif
     431             : }
     432             : 
     433           0 : SIMD_INLINE v128 v128_min_s16(v128 a, v128 b) { return _mm_min_epi16(a, b); }
     434             : 
     435             : SIMD_INLINE v128 v128_max_s16(v128 a, v128 b) { return _mm_max_epi16(a, b); }
     436             : 
     437             : SIMD_INLINE v128 v128_cmpgt_s8(v128 a, v128 b) { return _mm_cmpgt_epi8(a, b); }
     438             : 
     439           0 : SIMD_INLINE v128 v128_cmplt_s8(v128 a, v128 b) { return _mm_cmplt_epi8(a, b); }
     440             : 
     441             : SIMD_INLINE v128 v128_cmpeq_8(v128 a, v128 b) { return _mm_cmpeq_epi8(a, b); }
     442             : 
     443             : SIMD_INLINE v128 v128_cmpgt_s16(v128 a, v128 b) {
     444             :   return _mm_cmpgt_epi16(a, b);
     445             : }
     446             : 
     447             : SIMD_INLINE v128 v128_cmplt_s16(v128 a, v128 b) {
     448           0 :   return _mm_cmplt_epi16(a, b);
     449             : }
     450             : 
     451             : SIMD_INLINE v128 v128_cmpeq_16(v128 a, v128 b) { return _mm_cmpeq_epi16(a, b); }
     452             : 
     453             : SIMD_INLINE v128 v128_shl_8(v128 a, unsigned int c) {
     454             :   return _mm_and_si128(_mm_set1_epi8((uint8_t)(0xff << c)),
     455             :                        _mm_sll_epi16(a, _mm_cvtsi32_si128(c)));
     456             : }
     457             : 
     458             : SIMD_INLINE v128 v128_shr_u8(v128 a, unsigned int c) {
     459           0 :   return _mm_and_si128(_mm_set1_epi8(0xff >> c),
     460             :                        _mm_srl_epi16(a, _mm_cvtsi32_si128(c)));
     461             : }
     462             : 
     463             : SIMD_INLINE v128 v128_shr_s8(v128 a, unsigned int c) {
     464           0 :   __m128i x = _mm_cvtsi32_si128(c + 8);
     465           0 :   return _mm_packs_epi16(_mm_sra_epi16(_mm_unpacklo_epi8(a, a), x),
     466             :                          _mm_sra_epi16(_mm_unpackhi_epi8(a, a), x));
     467             : }
     468             : 
     469             : SIMD_INLINE v128 v128_shl_16(v128 a, unsigned int c) {
     470             :   return _mm_sll_epi16(a, _mm_cvtsi32_si128(c));
     471             : }
     472             : 
     473             : SIMD_INLINE v128 v128_shr_u16(v128 a, unsigned int c) {
     474           0 :   return _mm_srl_epi16(a, _mm_cvtsi32_si128(c));
     475             : }
     476             : 
     477             : SIMD_INLINE v128 v128_shr_s16(v128 a, unsigned int c) {
     478           0 :   return _mm_sra_epi16(a, _mm_cvtsi32_si128(c));
     479             : }
     480             : 
     481             : SIMD_INLINE v128 v128_shl_32(v128 a, unsigned int c) {
     482             :   return _mm_sll_epi32(a, _mm_cvtsi32_si128(c));
     483             : }
     484             : 
     485             : SIMD_INLINE v128 v128_shr_u32(v128 a, unsigned int c) {
     486             :   return _mm_srl_epi32(a, _mm_cvtsi32_si128(c));
     487             : }
     488             : 
     489             : SIMD_INLINE v128 v128_shr_s32(v128 a, unsigned int c) {
     490             :   return _mm_sra_epi32(a, _mm_cvtsi32_si128(c));
     491             : }
     492             : 
     493             : /* These intrinsics require immediate values, so we must use #defines
     494             :    to enforce that. */
     495             : #define v128_shl_n_byte(a, c) _mm_slli_si128(a, c)
     496             : #define v128_shr_n_byte(a, c) _mm_srli_si128(a, c)
     497             : #define v128_shl_n_8(a, c) \
     498             :   _mm_and_si128(_mm_set1_epi8((uint8_t)(0xff << (c))), _mm_slli_epi16(a, c))
     499             : #define v128_shr_n_u8(a, c) \
     500             :   _mm_and_si128(_mm_set1_epi8(0xff >> (c)), _mm_srli_epi16(a, c))
     501             : #define v128_shr_n_s8(a, c)                                         \
     502             :   _mm_packs_epi16(_mm_srai_epi16(_mm_unpacklo_epi8(a, a), (c) + 8), \
     503             :                   _mm_srai_epi16(_mm_unpackhi_epi8(a, a), (c) + 8))
     504             : #define v128_shl_n_16(a, c) _mm_slli_epi16(a, c)
     505             : #define v128_shr_n_u16(a, c) _mm_srli_epi16(a, c)
     506             : #define v128_shr_n_s16(a, c) _mm_srai_epi16(a, c)
     507             : #define v128_shl_n_32(a, c) _mm_slli_epi32(a, c)
     508             : #define v128_shr_n_u32(a, c) _mm_srli_epi32(a, c)
     509             : #define v128_shr_n_s32(a, c) _mm_srai_epi32(a, c)
     510             : 
     511             : #endif /* _V128_INTRINSICS_H */

Generated by: LCOV version 1.13