Line data Source code
1 : /* vim: set shiftwidth=4 tabstop=8 autoindent cindent expandtab: */
2 : /* This Source Code Form is subject to the terms of the Mozilla Public
3 : * License, v. 2.0. If a copy of the MPL was not distributed with this
4 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 :
6 : /* compile-time and runtime tests for whether to use SSE instructions */
7 :
8 : #include "SSE.h"
9 :
10 : #ifdef HAVE_CPUID_H
11 : // cpuid.h is available on gcc 4.3 and higher on i386 and x86_64
12 : #include <cpuid.h>
13 : #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64))
14 : // MSVC 2005 or newer on x86-32 or x86-64
15 : #include <intrin.h>
16 : #endif
17 :
18 : namespace {
19 :
20 : // SSE.h has parallel #ifs which declare MOZILLA_SSE_HAVE_CPUID_DETECTION.
21 : // We can't declare these functions in the header file, however, because
22 : // <intrin.h> conflicts with <windows.h> on MSVC 2005, and some files want to
23 : // include both SSE.h and <windows.h>.
24 :
25 : #ifdef HAVE_CPUID_H
26 :
27 : enum CPUIDRegister { eax = 0, ebx = 1, ecx = 2, edx = 3 };
28 :
29 : static bool
30 27 : has_cpuid_bits(unsigned int level, CPUIDRegister reg, unsigned int bits)
31 : {
32 : unsigned int regs[4];
33 : unsigned int eax, ebx, ecx, edx;
34 27 : unsigned max = __get_cpuid_max(0, NULL);
35 27 : if (level > max)
36 3 : return false;
37 24 : __cpuid_count(level, 0, eax, ebx, ecx, edx);
38 24 : regs[0] = eax;
39 24 : regs[1] = ebx;
40 24 : regs[2] = ecx;
41 24 : regs[3] = edx;
42 24 : return (regs[reg] & bits) == bits;
43 : }
44 :
45 : #if !defined(MOZILLA_PRESUME_AVX)
46 6 : static uint64_t xgetbv(uint32_t xcr) {
47 : uint32_t eax, edx;
48 6 : __asm__ ( ".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(xcr));
49 6 : return (uint64_t)(edx) << 32 | eax;
50 : }
51 : #endif
52 :
53 : #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64))
54 :
55 : enum CPUIDRegister { eax = 0, ebx = 1, ecx = 2, edx = 3 };
56 :
57 : static bool
58 : has_cpuid_bits(unsigned int level, CPUIDRegister reg, unsigned int bits)
59 : {
60 : // Check that the level in question is supported.
61 : int regs[4];
62 : __cpuid(regs, level & 0x80000000u);
63 : if (unsigned(regs[0]) < level)
64 : return false;
65 :
66 : // "The __cpuid intrinsic clears the ECX register before calling the cpuid instruction."
67 : __cpuid(regs, level);
68 : return (unsigned(regs[reg]) & bits) == bits;
69 : }
70 :
71 : #if !defined(MOZILLA_PRESUME_AVX)
72 : static uint64_t xgetbv(uint32_t xcr) { return _xgetbv(xcr); }
73 : #endif
74 :
75 : #elif (defined(__GNUC__) || defined(__SUNPRO_CC)) && (defined(__i386) || defined(__x86_64__))
76 :
77 : enum CPUIDRegister { eax = 0, ebx = 1, ecx = 2, edx = 3 };
78 :
79 : #ifdef __i386
80 : static void
81 : moz_cpuid(int CPUInfo[4], int InfoType)
82 : {
83 : asm (
84 : "xchg %esi, %ebx\n"
85 : "xor %ecx, %ecx\n" // ecx is the sub-leaf (we only ever need 0)
86 : "cpuid\n"
87 : "movl %eax, (%edi)\n"
88 : "movl %ebx, 4(%edi)\n"
89 : "movl %ecx, 8(%edi)\n"
90 : "movl %edx, 12(%edi)\n"
91 : "xchg %esi, %ebx\n"
92 : :
93 : : "a"(InfoType), // %eax
94 : "D"(CPUInfo) // %edi
95 : : "%ecx", "%edx", "%esi"
96 : );
97 : }
98 : #else
99 : static void
100 : moz_cpuid(int CPUInfo[4], int InfoType)
101 : {
102 : asm (
103 : "xchg %rsi, %rbx\n"
104 : "xor %ecx, %ecx\n" // ecx is the sub-leaf (we only ever need 0)
105 : "cpuid\n"
106 : "movl %eax, (%rdi)\n"
107 : "movl %ebx, 4(%rdi)\n"
108 : "movl %ecx, 8(%rdi)\n"
109 : "movl %edx, 12(%rdi)\n"
110 : "xchg %rsi, %rbx\n"
111 : :
112 : : "a"(InfoType), // %eax
113 : "D"(CPUInfo) // %rdi
114 : : "%ecx", "%edx", "%rsi"
115 : );
116 : }
117 : #endif
118 :
119 : static bool
120 : has_cpuid_bits(unsigned int level, CPUIDRegister reg, unsigned int bits)
121 : {
122 : // Check that the level in question is supported.
123 : volatile int regs[4];
124 : moz_cpuid((int *)regs, level & 0x80000000u);
125 : if (unsigned(regs[0]) < level)
126 : return false;
127 :
128 : moz_cpuid((int *)regs, level);
129 : return (unsigned(regs[reg]) & bits) == bits;
130 : }
131 :
132 : #endif // end CPUID declarations
133 :
134 : } // namespace
135 :
136 : namespace mozilla {
137 :
138 : namespace sse_private {
139 :
140 : #if defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
141 :
142 : #if !defined(MOZILLA_PRESUME_MMX)
143 : bool mmx_enabled = has_cpuid_bits(1u, edx, (1u<<23));
144 : #endif
145 :
146 : #if !defined(MOZILLA_PRESUME_SSE)
147 : bool sse_enabled = has_cpuid_bits(1u, edx, (1u<<25));
148 : #endif
149 :
150 : #if !defined(MOZILLA_PRESUME_SSE2)
151 : bool sse2_enabled = has_cpuid_bits(1u, edx, (1u<<26));
152 : #endif
153 :
154 : #if !defined(MOZILLA_PRESUME_SSE3)
155 3 : bool sse3_enabled = has_cpuid_bits(1u, ecx, (1u<<0));
156 : #endif
157 :
158 : #if !defined(MOZILLA_PRESUME_SSSE3)
159 3 : bool ssse3_enabled = has_cpuid_bits(1u, ecx, (1u<<9));
160 : #endif
161 :
162 : #if !defined(MOZILLA_PRESUME_SSE4A)
163 3 : bool sse4a_enabled = has_cpuid_bits(0x80000001u, ecx, (1u<<6));
164 : #endif
165 :
166 : #if !defined(MOZILLA_PRESUME_SSE4_1)
167 3 : bool sse4_1_enabled = has_cpuid_bits(1u, ecx, (1u<<19));
168 : #endif
169 :
170 : #if !defined(MOZILLA_PRESUME_SSE4_2)
171 3 : bool sse4_2_enabled = has_cpuid_bits(1u, ecx, (1u<<20));
172 : #endif
173 :
174 : #if !defined(MOZILLA_PRESUME_AVX) || !defined(MOZILLA_PRESUME_AVX2)
175 6 : static bool has_avx()
176 : {
177 : #if defined(MOZILLA_PRESUME_AVX)
178 : return true;
179 : #else
180 6 : const unsigned AVX = 1u << 28;
181 6 : const unsigned OSXSAVE = 1u << 27;
182 6 : const unsigned XSAVE = 1u << 26;
183 :
184 6 : const unsigned XMM_STATE = 1u << 1;
185 6 : const unsigned YMM_STATE = 1u << 2;
186 6 : const unsigned AVX_STATE = XMM_STATE | YMM_STATE;
187 :
188 12 : return has_cpuid_bits(1u, ecx, AVX | OSXSAVE | XSAVE) &&
189 : // ensure the OS supports XSAVE of YMM registers
190 12 : (xgetbv(0) & AVX_STATE) == AVX_STATE;
191 : #endif // MOZILLA_PRESUME_AVX
192 : }
193 : #endif // !MOZILLA_PRESUME_AVX || !MOZILLA_PRESUME_AVX2
194 :
195 : #if !defined(MOZILLA_PRESUME_AVX)
196 3 : bool avx_enabled = has_avx();
197 : #endif
198 :
199 : #if !defined(MOZILLA_PRESUME_AVX2)
200 3 : bool avx2_enabled = has_avx() && has_cpuid_bits(7u, ebx, (1u<<5));
201 : #endif
202 :
203 : #if !defined(MOZILLA_PRESUME_AES)
204 3 : bool aes_enabled = has_cpuid_bits(1u, ecx, (1u<<25));
205 : #endif
206 :
207 : #endif
208 :
209 : } // namespace sse_private
210 9 : } // namespace mozilla
|