Line data Source code
1 : /*
2 : * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 : *
4 : * Use of this source code is governed by a BSD-style license
5 : * that can be found in the LICENSE file in the root of the source
6 : * tree. An additional intellectual property rights grant can be found
7 : * in the file PATENTS. All contributing project authors may
8 : * be found in the AUTHORS file in the root of the source tree.
9 : */
10 :
11 : #include "libyuv/cpu_id.h"
12 :
13 : #if defined(_MSC_VER)
14 : #include <intrin.h> // For __cpuidex()
15 : #endif
16 : #if !defined(__pnacl__) && !defined(__CLR_VER) && \
17 : !defined(__native_client__) && (defined(_M_IX86) || defined(_M_X64)) && \
18 : defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219)
19 : #include <immintrin.h> // For _xgetbv()
20 : #endif
21 :
22 : #if !defined(__native_client__)
23 : #include <stdlib.h> // For getenv()
24 : #endif
25 :
26 : // For ArmCpuCaps() but unittested on all platforms
27 : #include <stdio.h>
28 : #include <string.h>
29 :
30 : #include "libyuv/basic_types.h" // For CPU_X86
31 :
32 : #ifdef __cplusplus
33 : namespace libyuv {
34 : extern "C" {
35 : #endif
36 :
37 : // For functions that use the stack and have runtime checks for overflow,
38 : // use SAFEBUFFERS to avoid additional check.
39 : #if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219) && \
40 : !defined(__clang__)
41 : #define SAFEBUFFERS __declspec(safebuffers)
42 : #else
43 : #define SAFEBUFFERS
44 : #endif
45 :
46 : // Low level cpuid for X86.
47 : #if (defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \
48 : defined(__x86_64__)) && \
49 : !defined(__pnacl__) && !defined(__CLR_VER)
50 : LIBYUV_API
51 6 : void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) {
52 : #if defined(_MSC_VER)
53 : // Visual C version uses intrinsic or inline x86 assembly.
54 : #if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219)
55 : __cpuidex((int*)(cpu_info), info_eax, info_ecx);
56 : #elif defined(_M_IX86)
57 : __asm {
58 : mov eax, info_eax
59 : mov ecx, info_ecx
60 : mov edi, cpu_info
61 : cpuid
62 : mov [edi], eax
63 : mov [edi + 4], ebx
64 : mov [edi + 8], ecx
65 : mov [edi + 12], edx
66 : }
67 : #else // Visual C but not x86
68 : if (info_ecx == 0) {
69 : __cpuid((int*)(cpu_info), info_eax);
70 : } else {
71 : cpu_info[3] = cpu_info[2] = cpu_info[1] = cpu_info[0] = 0u;
72 : }
73 : #endif
74 : // GCC version uses inline x86 assembly.
75 : #else // defined(_MSC_VER)
76 : uint32 info_ebx, info_edx;
77 : asm volatile(
78 : #if defined(__i386__) && defined(__PIC__)
79 : // Preserve ebx for fpic 32 bit.
80 : "mov %%ebx, %%edi \n"
81 : "cpuid \n"
82 : "xchg %%edi, %%ebx \n"
83 : : "=D"(info_ebx),
84 : #else
85 : "cpuid \n"
86 : : "=b"(info_ebx),
87 : #endif // defined( __i386__) && defined(__PIC__)
88 6 : "+a"(info_eax), "+c"(info_ecx), "=d"(info_edx));
89 6 : cpu_info[0] = info_eax;
90 6 : cpu_info[1] = info_ebx;
91 6 : cpu_info[2] = info_ecx;
92 6 : cpu_info[3] = info_edx;
93 : #endif // defined(_MSC_VER)
94 6 : }
95 : #else // (defined(_M_IX86) || defined(_M_X64) ...
96 : LIBYUV_API
97 : void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info) {
98 : (void)eax;
99 : (void)ecx;
100 : cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0;
101 : }
102 : #endif
103 :
104 : // For VS2010 and earlier emit can be used:
105 : // _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 // For VS2010 and earlier.
106 : // __asm {
107 : // xor ecx, ecx // xcr 0
108 : // xgetbv
109 : // mov xcr0, eax
110 : // }
111 : // For VS2013 and earlier 32 bit, the _xgetbv(0) optimizer produces bad code.
112 : // https://code.google.com/p/libyuv/issues/detail?id=529
113 : #if defined(_M_IX86) && (_MSC_VER < 1900)
114 : #pragma optimize("g", off)
115 : #endif
116 : #if (defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \
117 : defined(__x86_64__)) && \
118 : !defined(__pnacl__) && !defined(__CLR_VER) && !defined(__native_client__)
119 : // X86 CPUs have xgetbv to detect OS saves high parts of ymm registers.
120 4 : int GetXCR0() {
121 4 : uint32 xcr0 = 0u;
122 : #if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219)
123 : xcr0 = (uint32)(_xgetbv(0)); // VS2010 SP1 required.
124 : #elif defined(__i386__) || defined(__x86_64__)
125 4 : asm(".byte 0x0f, 0x01, 0xd0" : "=a"(xcr0) : "c"(0) : "%edx");
126 : #endif // defined(__i386__) || defined(__x86_64__)
127 4 : return xcr0;
128 : }
129 : #else
130 : // xgetbv unavailable to query for OSSave support. Return 0.
131 : #define GetXCR0() 0
132 : #endif // defined(_M_IX86) || defined(_M_X64) ..
133 : // Return optimization to previous setting.
134 : #if defined(_M_IX86) && (_MSC_VER < 1900)
135 : #pragma optimize("g", on)
136 : #endif
137 :
138 : // based on libvpx arm_cpudetect.c
139 : // For Arm, but public to allow testing on any CPU
140 0 : LIBYUV_API SAFEBUFFERS int ArmCpuCaps(const char* cpuinfo_name) {
141 : char cpuinfo_line[512];
142 0 : FILE* f = fopen(cpuinfo_name, "r");
143 0 : if (!f) {
144 : // Assume Neon if /proc/cpuinfo is unavailable.
145 : // This will occur for Chrome sandbox for Pepper or Render process.
146 0 : return kCpuHasNEON;
147 : }
148 0 : while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f)) {
149 0 : if (memcmp(cpuinfo_line, "Features", 8) == 0) {
150 0 : char* p = strstr(cpuinfo_line, " neon");
151 0 : if (p && (p[5] == ' ' || p[5] == '\n')) {
152 0 : fclose(f);
153 0 : return kCpuHasNEON;
154 : }
155 : // aarch64 uses asimd for Neon.
156 0 : p = strstr(cpuinfo_line, " asimd");
157 0 : if (p && (p[6] == ' ' || p[6] == '\n')) {
158 0 : fclose(f);
159 0 : return kCpuHasNEON;
160 : }
161 : }
162 : }
163 0 : fclose(f);
164 0 : return 0;
165 : }
166 :
167 0 : LIBYUV_API SAFEBUFFERS int MipsCpuCaps(const char* cpuinfo_name,
168 : const char ase[]) {
169 : char cpuinfo_line[512];
170 0 : int len = (int)strlen(ase);
171 0 : FILE* f = fopen(cpuinfo_name, "r");
172 0 : if (!f) {
173 : // ase enabled if /proc/cpuinfo is unavailable.
174 0 : if (strcmp(ase, " msa") == 0) {
175 0 : return kCpuHasMSA;
176 : }
177 0 : if (strcmp(ase, " dspr2") == 0) {
178 0 : return kCpuHasDSPR2;
179 : }
180 0 : return 0;
181 : }
182 0 : while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f)) {
183 0 : if (memcmp(cpuinfo_line, "ASEs implemented", 16) == 0) {
184 0 : char* p = strstr(cpuinfo_line, ase);
185 0 : if (p && (p[len] == ' ' || p[len] == '\n')) {
186 0 : fclose(f);
187 0 : if (strcmp(ase, " msa") == 0) {
188 0 : return kCpuHasMSA;
189 : }
190 0 : if (strcmp(ase, " dspr2") == 0) {
191 0 : return kCpuHasDSPR2;
192 : }
193 0 : return 0;
194 : }
195 : }
196 : }
197 0 : fclose(f);
198 0 : return 0;
199 : }
200 :
201 : // CPU detect function for SIMD instruction sets.
202 : LIBYUV_API
203 : int cpu_info_ = 0; // cpu_info is not initialized yet.
204 :
205 : // Test environment variable for disabling CPU features. Any non-zero value
206 : // to disable. Zero ignored to make it easy to set the variable on/off.
207 : #if !defined(__native_client__) && !defined(_M_ARM)
208 :
209 24 : static LIBYUV_BOOL TestEnv(const char* name) {
210 24 : const char* var = getenv(name);
211 24 : if (var) {
212 0 : if (var[0] != '0') {
213 0 : return LIBYUV_TRUE;
214 : }
215 : }
216 24 : return LIBYUV_FALSE;
217 : }
218 : #else // nacl does not support getenv().
219 : static LIBYUV_BOOL TestEnv(const char*) {
220 : return LIBYUV_FALSE;
221 : }
222 : #endif
223 :
224 2 : LIBYUV_API SAFEBUFFERS int InitCpuFlags(void) {
225 2 : int cpu_info = 0;
226 : #if !defined(__pnacl__) && !defined(__CLR_VER) && defined(CPU_X86)
227 2 : uint32 cpu_info0[4] = {0, 0, 0, 0};
228 2 : uint32 cpu_info1[4] = {0, 0, 0, 0};
229 2 : uint32 cpu_info7[4] = {0, 0, 0, 0};
230 2 : CpuId(0, 0, cpu_info0);
231 2 : CpuId(1, 0, cpu_info1);
232 2 : if (cpu_info0[0] >= 7) {
233 2 : CpuId(7, 0, cpu_info7);
234 : }
235 6 : cpu_info = kCpuHasX86 | ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) |
236 4 : ((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) |
237 4 : ((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) |
238 2 : ((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) |
239 2 : ((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0);
240 :
241 : // AVX requires OS saves YMM registers.
242 4 : if (((cpu_info1[2] & 0x1c000000) == 0x1c000000) && // AVX and OSXSave
243 2 : ((GetXCR0() & 6) == 6)) { // Test OS saves YMM registers
244 6 : cpu_info |= kCpuHasAVX | ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) |
245 4 : ((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) |
246 2 : ((cpu_info1[2] & 0x20000000) ? kCpuHasF16C : 0);
247 :
248 : // Detect AVX512bw
249 2 : if ((GetXCR0() & 0xe0) == 0xe0) {
250 0 : cpu_info |= (cpu_info7[1] & 0x40000000) ? kCpuHasAVX3 : 0;
251 : }
252 : }
253 :
254 : // Environment variable overrides for testing.
255 2 : if (TestEnv("LIBYUV_DISABLE_X86")) {
256 0 : cpu_info &= ~kCpuHasX86;
257 : }
258 2 : if (TestEnv("LIBYUV_DISABLE_SSE2")) {
259 0 : cpu_info &= ~kCpuHasSSE2;
260 : }
261 2 : if (TestEnv("LIBYUV_DISABLE_SSSE3")) {
262 0 : cpu_info &= ~kCpuHasSSSE3;
263 : }
264 2 : if (TestEnv("LIBYUV_DISABLE_SSE41")) {
265 0 : cpu_info &= ~kCpuHasSSE41;
266 : }
267 2 : if (TestEnv("LIBYUV_DISABLE_SSE42")) {
268 0 : cpu_info &= ~kCpuHasSSE42;
269 : }
270 2 : if (TestEnv("LIBYUV_DISABLE_AVX")) {
271 0 : cpu_info &= ~kCpuHasAVX;
272 : }
273 2 : if (TestEnv("LIBYUV_DISABLE_AVX2")) {
274 0 : cpu_info &= ~kCpuHasAVX2;
275 : }
276 2 : if (TestEnv("LIBYUV_DISABLE_ERMS")) {
277 0 : cpu_info &= ~kCpuHasERMS;
278 : }
279 2 : if (TestEnv("LIBYUV_DISABLE_FMA3")) {
280 0 : cpu_info &= ~kCpuHasFMA3;
281 : }
282 2 : if (TestEnv("LIBYUV_DISABLE_AVX3")) {
283 0 : cpu_info &= ~kCpuHasAVX3;
284 : }
285 2 : if (TestEnv("LIBYUV_DISABLE_F16C")) {
286 0 : cpu_info &= ~kCpuHasF16C;
287 : }
288 :
289 : #endif
290 : #if defined(__mips__) && defined(__linux__)
291 : #if defined(__mips_dspr2)
292 : cpu_info |= kCpuHasDSPR2;
293 : #endif
294 : #if defined(__mips_msa)
295 : cpu_info = MipsCpuCaps("/proc/cpuinfo", " msa");
296 : #endif
297 : cpu_info |= kCpuHasMIPS;
298 : if (getenv("LIBYUV_DISABLE_DSPR2")) {
299 : cpu_info &= ~kCpuHasDSPR2;
300 : }
301 : if (getenv("LIBYUV_DISABLE_MSA")) {
302 : cpu_info &= ~kCpuHasMSA;
303 : }
304 : #endif
305 : #if defined(__arm__) || defined(__aarch64__)
306 : // gcc -mfpu=neon defines __ARM_NEON__
307 : // __ARM_NEON__ generates code that requires Neon. NaCL also requires Neon.
308 : // For Linux, /proc/cpuinfo can be tested but without that assume Neon.
309 : #if defined(__ARM_NEON__) || defined(__native_client__) || !defined(__linux__)
310 : cpu_info = kCpuHasNEON;
311 : // For aarch64(arm64), /proc/cpuinfo's feature is not complete, e.g. no neon
312 : // flag in it.
313 : // So for aarch64, neon enabling is hard coded here.
314 : #endif
315 : #if defined(__aarch64__)
316 : cpu_info = kCpuHasNEON;
317 : #else
318 : // Linux arm parse text file for neon detect.
319 : cpu_info = ArmCpuCaps("/proc/cpuinfo");
320 : #endif
321 : cpu_info |= kCpuHasARM;
322 : if (TestEnv("LIBYUV_DISABLE_NEON")) {
323 : cpu_info &= ~kCpuHasNEON;
324 : }
325 : #endif // __arm__
326 2 : if (TestEnv("LIBYUV_DISABLE_ASM")) {
327 0 : cpu_info = 0;
328 : }
329 2 : cpu_info |= kCpuInitialized;
330 2 : cpu_info_ = cpu_info;
331 2 : return cpu_info;
332 : }
333 :
334 : // Note that use of this function is not thread safe.
335 : LIBYUV_API
336 0 : void MaskCpuFlags(int enable_flags) {
337 0 : cpu_info_ = InitCpuFlags() & enable_flags;
338 0 : }
339 :
340 : #ifdef __cplusplus
341 : } // extern "C"
342 : } // namespace libyuv
343 : #endif
|