Line data Source code
1 : /*
2 : * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 : *
4 : * This source code is subject to the terms of the BSD 2 Clause License and
5 : * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 : * was not distributed with this source code in the LICENSE file, you can
7 : * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 : * Media Patent License 1.0 was not distributed with this source code in the
9 : * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 : */
11 :
12 : #ifndef AOM_DSP_X86_SYNONYMS_H_
13 : #define AOM_DSP_X86_SYNONYMS_H_
14 :
15 : #include <immintrin.h>
16 :
17 : #include "./aom_config.h"
18 : #include "aom/aom_integer.h"
19 :
20 : /**
21 : * Various reusable shorthands for x86 SIMD intrinsics.
22 : *
23 : * Intrinsics prefixed with xx_ operate on or return 128bit XMM registers.
24 : * Intrinsics prefixed with yy_ operate on or return 256bit YMM registers.
25 : */
26 :
27 : // Loads and stores to do away with the tedium of casting the address
28 : // to the right type.
29 0 : static INLINE __m128i xx_loadl_32(const void *a) {
30 0 : return _mm_cvtsi32_si128(*(const uint32_t *)a);
31 : }
32 :
33 0 : static INLINE __m128i xx_loadl_64(const void *a) {
34 0 : return _mm_loadl_epi64((const __m128i *)a);
35 : }
36 :
37 0 : static INLINE __m128i xx_load_128(const void *a) {
38 0 : return _mm_load_si128((const __m128i *)a);
39 : }
40 :
41 0 : static INLINE __m128i xx_loadu_128(const void *a) {
42 0 : return _mm_loadu_si128((const __m128i *)a);
43 : }
44 :
45 0 : static INLINE void xx_storel_32(void *const a, const __m128i v) {
46 0 : *(uint32_t *)a = _mm_cvtsi128_si32(v);
47 0 : }
48 :
49 0 : static INLINE void xx_storel_64(void *const a, const __m128i v) {
50 : _mm_storel_epi64((__m128i *)a, v);
51 0 : }
52 :
53 0 : static INLINE void xx_store_128(void *const a, const __m128i v) {
54 : _mm_store_si128((__m128i *)a, v);
55 0 : }
56 :
57 0 : static INLINE void xx_storeu_128(void *const a, const __m128i v) {
58 : _mm_storeu_si128((__m128i *)a, v);
59 0 : }
60 :
61 0 : static INLINE __m128i xx_round_epu16(__m128i v_val_w) {
62 0 : return _mm_avg_epu16(v_val_w, _mm_setzero_si128());
63 : }
64 :
65 0 : static INLINE __m128i xx_roundn_epu16(__m128i v_val_w, int bits) {
66 0 : const __m128i v_s_w = _mm_srli_epi16(v_val_w, bits - 1);
67 0 : return _mm_avg_epu16(v_s_w, _mm_setzero_si128());
68 : }
69 :
70 0 : static INLINE __m128i xx_roundn_epu32(__m128i v_val_d, int bits) {
71 0 : const __m128i v_bias_d = _mm_set1_epi32((1 << bits) >> 1);
72 0 : const __m128i v_tmp_d = _mm_add_epi32(v_val_d, v_bias_d);
73 0 : return _mm_srli_epi32(v_tmp_d, bits);
74 : }
75 :
76 : // This is equivalent to ROUND_POWER_OF_TWO(v_val_d, bits)
77 0 : static INLINE __m128i xx_roundn_epi32_unsigned(__m128i v_val_d, int bits) {
78 0 : const __m128i v_bias_d = _mm_set1_epi32((1 << bits) >> 1);
79 0 : const __m128i v_tmp_d = _mm_add_epi32(v_val_d, v_bias_d);
80 0 : return _mm_srai_epi32(v_tmp_d, bits);
81 : }
82 :
83 : // This is equivalent to ROUND_POWER_OF_TWO_SIGNED(v_val_d, bits)
84 0 : static INLINE __m128i xx_roundn_epi32(__m128i v_val_d, int bits) {
85 0 : const __m128i v_bias_d = _mm_set1_epi32((1 << bits) >> 1);
86 0 : const __m128i v_sign_d = _mm_srai_epi32(v_val_d, 31);
87 0 : const __m128i v_tmp_d =
88 0 : _mm_add_epi32(_mm_add_epi32(v_val_d, v_bias_d), v_sign_d);
89 0 : return _mm_srai_epi32(v_tmp_d, bits);
90 : }
91 :
92 : #endif // AOM_DSP_X86_SYNONYMS_H_
|