LCOV - code coverage report
Current view: top level - media/libyuv/libyuv/source - cpu_id.cc (source / functions) Hit Total Coverage
Test: output.info Lines: 50 105 47.6 %
Date: 2017-07-14 16:53:18 Functions: 4 7 57.1 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
       3             :  *
       4             :  *  Use of this source code is governed by a BSD-style license
       5             :  *  that can be found in the LICENSE file in the root of the source
       6             :  *  tree. An additional intellectual property rights grant can be found
       7             :  *  in the file PATENTS. All contributing project authors may
       8             :  *  be found in the AUTHORS file in the root of the source tree.
       9             :  */
      10             : 
      11             : #include "libyuv/cpu_id.h"
      12             : 
      13             : #if defined(_MSC_VER)
      14             : #include <intrin.h>  // For __cpuidex()
      15             : #endif
      16             : #if !defined(__pnacl__) && !defined(__CLR_VER) &&                           \
      17             :     !defined(__native_client__) && (defined(_M_IX86) || defined(_M_X64)) && \
      18             :     defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219)
      19             : #include <immintrin.h>  // For _xgetbv()
      20             : #endif
      21             : 
      22             : #if !defined(__native_client__)
      23             : #include <stdlib.h>  // For getenv()
      24             : #endif
      25             : 
      26             : // For ArmCpuCaps() but unittested on all platforms
      27             : #include <stdio.h>
      28             : #include <string.h>
      29             : 
      30             : #include "libyuv/basic_types.h"  // For CPU_X86
      31             : 
      32             : #ifdef __cplusplus
      33             : namespace libyuv {
      34             : extern "C" {
      35             : #endif
      36             : 
      37             : // For functions that use the stack and have runtime checks for overflow,
      38             : // use SAFEBUFFERS to avoid additional check.
      39             : #if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219) && \
      40             :     !defined(__clang__)
      41             : #define SAFEBUFFERS __declspec(safebuffers)
      42             : #else
      43             : #define SAFEBUFFERS
      44             : #endif
      45             : 
      46             : // Low level cpuid for X86.
      47             : #if (defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \
      48             :      defined(__x86_64__)) &&                                     \
      49             :     !defined(__pnacl__) && !defined(__CLR_VER)
      50             : LIBYUV_API
      51           6 : void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) {
      52             : #if defined(_MSC_VER)
      53             : // Visual C version uses intrinsic or inline x86 assembly.
      54             : #if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219)
      55             :   __cpuidex((int*)(cpu_info), info_eax, info_ecx);
      56             : #elif defined(_M_IX86)
      57             :   __asm {
      58             :     mov        eax, info_eax
      59             :     mov        ecx, info_ecx
      60             :     mov        edi, cpu_info
      61             :     cpuid
      62             :     mov        [edi], eax
      63             :     mov        [edi + 4], ebx
      64             :     mov        [edi + 8], ecx
      65             :     mov        [edi + 12], edx
      66             :   }
      67             : #else  // Visual C but not x86
      68             :   if (info_ecx == 0) {
      69             :     __cpuid((int*)(cpu_info), info_eax);
      70             :   } else {
      71             :     cpu_info[3] = cpu_info[2] = cpu_info[1] = cpu_info[0] = 0u;
      72             :   }
      73             : #endif
      74             : // GCC version uses inline x86 assembly.
      75             : #else  // defined(_MSC_VER)
      76             :   uint32 info_ebx, info_edx;
      77             :   asm volatile(
      78             : #if defined(__i386__) && defined(__PIC__)
      79             :       // Preserve ebx for fpic 32 bit.
      80             :       "mov %%ebx, %%edi                          \n"
      81             :       "cpuid                                     \n"
      82             :       "xchg %%edi, %%ebx                         \n"
      83             :       : "=D"(info_ebx),
      84             : #else
      85             :       "cpuid                                     \n"
      86             :       : "=b"(info_ebx),
      87             : #endif  //  defined( __i386__) && defined(__PIC__)
      88           6 :         "+a"(info_eax), "+c"(info_ecx), "=d"(info_edx));
      89           6 :   cpu_info[0] = info_eax;
      90           6 :   cpu_info[1] = info_ebx;
      91           6 :   cpu_info[2] = info_ecx;
      92           6 :   cpu_info[3] = info_edx;
      93             : #endif  // defined(_MSC_VER)
      94           6 : }
      95             : #else  // (defined(_M_IX86) || defined(_M_X64) ...
      96             : LIBYUV_API
      97             : void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info) {
      98             :   (void)eax;
      99             :   (void)ecx;
     100             :   cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0;
     101             : }
     102             : #endif
     103             : 
     104             : // For VS2010 and earlier emit can be used:
     105             : //   _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0  // For VS2010 and earlier.
     106             : //  __asm {
     107             : //    xor        ecx, ecx    // xcr 0
     108             : //    xgetbv
     109             : //    mov        xcr0, eax
     110             : //  }
     111             : // For VS2013 and earlier 32 bit, the _xgetbv(0) optimizer produces bad code.
     112             : // https://code.google.com/p/libyuv/issues/detail?id=529
     113             : #if defined(_M_IX86) && (_MSC_VER < 1900)
     114             : #pragma optimize("g", off)
     115             : #endif
     116             : #if (defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \
     117             :      defined(__x86_64__)) &&                                     \
     118             :     !defined(__pnacl__) && !defined(__CLR_VER) && !defined(__native_client__)
     119             : // X86 CPUs have xgetbv to detect OS saves high parts of ymm registers.
     120           4 : int GetXCR0() {
     121           4 :   uint32 xcr0 = 0u;
     122             : #if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219)
     123             :   xcr0 = (uint32)(_xgetbv(0));  // VS2010 SP1 required.
     124             : #elif defined(__i386__) || defined(__x86_64__)
     125           4 :   asm(".byte 0x0f, 0x01, 0xd0" : "=a"(xcr0) : "c"(0) : "%edx");
     126             : #endif  // defined(__i386__) || defined(__x86_64__)
     127           4 :   return xcr0;
     128             : }
     129             : #else
     130             : // xgetbv unavailable to query for OSSave support.  Return 0.
     131             : #define GetXCR0() 0
     132             : #endif  // defined(_M_IX86) || defined(_M_X64) ..
     133             : // Return optimization to previous setting.
     134             : #if defined(_M_IX86) && (_MSC_VER < 1900)
     135             : #pragma optimize("g", on)
     136             : #endif
     137             : 
     138             : // based on libvpx arm_cpudetect.c
     139             : // For Arm, but public to allow testing on any CPU
     140           0 : LIBYUV_API SAFEBUFFERS int ArmCpuCaps(const char* cpuinfo_name) {
     141             :   char cpuinfo_line[512];
     142           0 :   FILE* f = fopen(cpuinfo_name, "r");
     143           0 :   if (!f) {
     144             :     // Assume Neon if /proc/cpuinfo is unavailable.
     145             :     // This will occur for Chrome sandbox for Pepper or Render process.
     146           0 :     return kCpuHasNEON;
     147             :   }
     148           0 :   while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f)) {
     149           0 :     if (memcmp(cpuinfo_line, "Features", 8) == 0) {
     150           0 :       char* p = strstr(cpuinfo_line, " neon");
     151           0 :       if (p && (p[5] == ' ' || p[5] == '\n')) {
     152           0 :         fclose(f);
     153           0 :         return kCpuHasNEON;
     154             :       }
     155             :       // aarch64 uses asimd for Neon.
     156           0 :       p = strstr(cpuinfo_line, " asimd");
     157           0 :       if (p && (p[6] == ' ' || p[6] == '\n')) {
     158           0 :         fclose(f);
     159           0 :         return kCpuHasNEON;
     160             :       }
     161             :     }
     162             :   }
     163           0 :   fclose(f);
     164           0 :   return 0;
     165             : }
     166             : 
     167           0 : LIBYUV_API SAFEBUFFERS int MipsCpuCaps(const char* cpuinfo_name,
     168             :                                        const char ase[]) {
     169             :   char cpuinfo_line[512];
     170           0 :   int len = (int)strlen(ase);
     171           0 :   FILE* f = fopen(cpuinfo_name, "r");
     172           0 :   if (!f) {
     173             :     // ase enabled if /proc/cpuinfo is unavailable.
     174           0 :     if (strcmp(ase, " msa") == 0) {
     175           0 :       return kCpuHasMSA;
     176             :     }
     177           0 :     if (strcmp(ase, " dspr2") == 0) {
     178           0 :       return kCpuHasDSPR2;
     179             :     }
     180           0 :     return 0;
     181             :   }
     182           0 :   while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f)) {
     183           0 :     if (memcmp(cpuinfo_line, "ASEs implemented", 16) == 0) {
     184           0 :       char* p = strstr(cpuinfo_line, ase);
     185           0 :       if (p && (p[len] == ' ' || p[len] == '\n')) {
     186           0 :         fclose(f);
     187           0 :         if (strcmp(ase, " msa") == 0) {
     188           0 :           return kCpuHasMSA;
     189             :         }
     190           0 :         if (strcmp(ase, " dspr2") == 0) {
     191           0 :           return kCpuHasDSPR2;
     192             :         }
     193           0 :         return 0;
     194             :       }
     195             :     }
     196             :   }
     197           0 :   fclose(f);
     198           0 :   return 0;
     199             : }
     200             : 
     201             : // CPU detect function for SIMD instruction sets.
     202             : LIBYUV_API
     203             : int cpu_info_ = 0;  // cpu_info is not initialized yet.
     204             : 
     205             : // Test environment variable for disabling CPU features. Any non-zero value
     206             : // to disable. Zero ignored to make it easy to set the variable on/off.
     207             : #if !defined(__native_client__) && !defined(_M_ARM)
     208             : 
     209          24 : static LIBYUV_BOOL TestEnv(const char* name) {
     210          24 :   const char* var = getenv(name);
     211          24 :   if (var) {
     212           0 :     if (var[0] != '0') {
     213           0 :       return LIBYUV_TRUE;
     214             :     }
     215             :   }
     216          24 :   return LIBYUV_FALSE;
     217             : }
     218             : #else  // nacl does not support getenv().
     219             : static LIBYUV_BOOL TestEnv(const char*) {
     220             :   return LIBYUV_FALSE;
     221             : }
     222             : #endif
     223             : 
     224           2 : LIBYUV_API SAFEBUFFERS int InitCpuFlags(void) {
     225           2 :   int cpu_info = 0;
     226             : #if !defined(__pnacl__) && !defined(__CLR_VER) && defined(CPU_X86)
     227           2 :   uint32 cpu_info0[4] = {0, 0, 0, 0};
     228           2 :   uint32 cpu_info1[4] = {0, 0, 0, 0};
     229           2 :   uint32 cpu_info7[4] = {0, 0, 0, 0};
     230           2 :   CpuId(0, 0, cpu_info0);
     231           2 :   CpuId(1, 0, cpu_info1);
     232           2 :   if (cpu_info0[0] >= 7) {
     233           2 :     CpuId(7, 0, cpu_info7);
     234             :   }
     235           6 :   cpu_info = kCpuHasX86 | ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) |
     236           4 :              ((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) |
     237           4 :              ((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) |
     238           2 :              ((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) |
     239           2 :              ((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0);
     240             : 
     241             :   // AVX requires OS saves YMM registers.
     242           4 :   if (((cpu_info1[2] & 0x1c000000) == 0x1c000000) &&  // AVX and OSXSave
     243           2 :       ((GetXCR0() & 6) == 6)) {  // Test OS saves YMM registers
     244           6 :     cpu_info |= kCpuHasAVX | ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) |
     245           4 :                 ((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) |
     246           2 :                 ((cpu_info1[2] & 0x20000000) ? kCpuHasF16C : 0);
     247             : 
     248             :     // Detect AVX512bw
     249           2 :     if ((GetXCR0() & 0xe0) == 0xe0) {
     250           0 :       cpu_info |= (cpu_info7[1] & 0x40000000) ? kCpuHasAVX3 : 0;
     251             :     }
     252             :   }
     253             : 
     254             :   // Environment variable overrides for testing.
     255           2 :   if (TestEnv("LIBYUV_DISABLE_X86")) {
     256           0 :     cpu_info &= ~kCpuHasX86;
     257             :   }
     258           2 :   if (TestEnv("LIBYUV_DISABLE_SSE2")) {
     259           0 :     cpu_info &= ~kCpuHasSSE2;
     260             :   }
     261           2 :   if (TestEnv("LIBYUV_DISABLE_SSSE3")) {
     262           0 :     cpu_info &= ~kCpuHasSSSE3;
     263             :   }
     264           2 :   if (TestEnv("LIBYUV_DISABLE_SSE41")) {
     265           0 :     cpu_info &= ~kCpuHasSSE41;
     266             :   }
     267           2 :   if (TestEnv("LIBYUV_DISABLE_SSE42")) {
     268           0 :     cpu_info &= ~kCpuHasSSE42;
     269             :   }
     270           2 :   if (TestEnv("LIBYUV_DISABLE_AVX")) {
     271           0 :     cpu_info &= ~kCpuHasAVX;
     272             :   }
     273           2 :   if (TestEnv("LIBYUV_DISABLE_AVX2")) {
     274           0 :     cpu_info &= ~kCpuHasAVX2;
     275             :   }
     276           2 :   if (TestEnv("LIBYUV_DISABLE_ERMS")) {
     277           0 :     cpu_info &= ~kCpuHasERMS;
     278             :   }
     279           2 :   if (TestEnv("LIBYUV_DISABLE_FMA3")) {
     280           0 :     cpu_info &= ~kCpuHasFMA3;
     281             :   }
     282           2 :   if (TestEnv("LIBYUV_DISABLE_AVX3")) {
     283           0 :     cpu_info &= ~kCpuHasAVX3;
     284             :   }
     285           2 :   if (TestEnv("LIBYUV_DISABLE_F16C")) {
     286           0 :     cpu_info &= ~kCpuHasF16C;
     287             :   }
     288             : 
     289             : #endif
     290             : #if defined(__mips__) && defined(__linux__)
     291             : #if defined(__mips_dspr2)
     292             :   cpu_info |= kCpuHasDSPR2;
     293             : #endif
     294             : #if defined(__mips_msa)
     295             :   cpu_info = MipsCpuCaps("/proc/cpuinfo", " msa");
     296             : #endif
     297             :   cpu_info |= kCpuHasMIPS;
     298             :   if (getenv("LIBYUV_DISABLE_DSPR2")) {
     299             :     cpu_info &= ~kCpuHasDSPR2;
     300             :   }
     301             :   if (getenv("LIBYUV_DISABLE_MSA")) {
     302             :     cpu_info &= ~kCpuHasMSA;
     303             :   }
     304             : #endif
     305             : #if defined(__arm__) || defined(__aarch64__)
     306             : // gcc -mfpu=neon defines __ARM_NEON__
     307             : // __ARM_NEON__ generates code that requires Neon.  NaCL also requires Neon.
     308             : // For Linux, /proc/cpuinfo can be tested but without that assume Neon.
     309             : #if defined(__ARM_NEON__) || defined(__native_client__) || !defined(__linux__)
     310             :   cpu_info = kCpuHasNEON;
     311             : // For aarch64(arm64), /proc/cpuinfo's feature is not complete, e.g. no neon
     312             : // flag in it.
     313             : // So for aarch64, neon enabling is hard coded here.
     314             : #endif
     315             : #if defined(__aarch64__)
     316             :   cpu_info = kCpuHasNEON;
     317             : #else
     318             :   // Linux arm parse text file for neon detect.
     319             :   cpu_info = ArmCpuCaps("/proc/cpuinfo");
     320             : #endif
     321             :   cpu_info |= kCpuHasARM;
     322             :   if (TestEnv("LIBYUV_DISABLE_NEON")) {
     323             :     cpu_info &= ~kCpuHasNEON;
     324             :   }
     325             : #endif  // __arm__
     326           2 :   if (TestEnv("LIBYUV_DISABLE_ASM")) {
     327           0 :     cpu_info = 0;
     328             :   }
     329           2 :   cpu_info |= kCpuInitialized;
     330           2 :   cpu_info_ = cpu_info;
     331           2 :   return cpu_info;
     332             : }
     333             : 
     334             : // Note that use of this function is not thread safe.
     335             : LIBYUV_API
     336           0 : void MaskCpuFlags(int enable_flags) {
     337           0 :   cpu_info_ = InitCpuFlags() & enable_flags;
     338           0 : }
     339             : 
     340             : #ifdef __cplusplus
     341             : }  // extern "C"
     342             : }  // namespace libyuv
     343             : #endif

Generated by: LCOV version 1.13