LCOV - code coverage report
Current view: top level - gfx/skia/skia/src/opts - SkChecksum_opts.h (source / functions) Hit Total Coverage
Test: output.info Lines: 34 62 54.8 %
Date: 2017-07-14 16:53:18 Functions: 3 7 42.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * Copyright 2016 Google Inc.
       3             :  *
       4             :  * Use of this source code is governed by a BSD-style license that can be
       5             :  * found in the LICENSE file.
       6             :  */
       7             : 
       8             : #ifndef SkChecksum_opts_DEFINED
       9             : #define SkChecksum_opts_DEFINED
      10             : 
      11             : #include "SkChecksum.h"
      12             : #include "SkTypes.h"
      13             : 
      14             : #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42
      15             :     #include <immintrin.h>
      16             : #elif defined(SK_CPU_ARM64) && defined(SK_ARM_HAS_CRC32)
      17             :     #include <arm_acle.h>
      18             : #endif
      19             : 
      20             : namespace SK_OPTS_NS {
      21             : 
      22             : template <typename T>
      23         575 : static inline T unaligned_load(const uint8_t* src) {
      24             :     T val;
      25         575 :     memcpy(&val, src, sizeof(val));
      26         575 :     return val;
      27             : }
      28             : 
      29             : #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42 && (defined(__x86_64__) || defined(_M_X64))
      30             :     // This is not a CRC32.  It's Just A Hash that uses those instructions because they're fast.
      31          64 :     static uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t seed) {
      32          64 :         auto data = (const uint8_t*)vdata;
      33             : 
      34             :         // _mm_crc32_u64() operates on 64-bit registers, so we use uint64_t for a while.
      35          64 :         uint64_t hash = seed;
      36          64 :         if (bytes >= 24) {
      37             :             // We'll create 3 independent hashes, each using _mm_crc32_u64()
      38             :             // to hash 8 bytes per step.  Both 3 and independent are important:
      39             :             // we can execute 3 of these instructions in parallel on a single core.
      40          64 :             uint64_t a = hash,
      41          64 :                      b = hash,
      42          64 :                      c = hash;
      43          64 :             size_t steps = bytes/24;
      44         320 :             while (steps --> 0) {
      45         256 :                 a = _mm_crc32_u64(a, unaligned_load<uint64_t>(data+ 0));
      46         256 :                 b = _mm_crc32_u64(b, unaligned_load<uint64_t>(data+ 8));
      47         256 :                 c = _mm_crc32_u64(c, unaligned_load<uint64_t>(data+16));
      48         128 :                 data += 24;
      49             :             }
      50          64 :             bytes %= 24;
      51          64 :             hash = a^b^c;
      52             :         }
      53             : 
      54          64 :         SkASSERT(bytes < 24);
      55          64 :         if (bytes >= 16) {
      56         128 :             hash = _mm_crc32_u64(hash, unaligned_load<uint64_t>(data));
      57          64 :             bytes -= 8;
      58          64 :             data  += 8;
      59             :         }
      60             : 
      61          64 :         SkASSERT(bytes < 16);
      62          64 :         if (bytes & 8) {
      63         128 :             hash = _mm_crc32_u64(hash, unaligned_load<uint64_t>(data));
      64          64 :             data  += 8;
      65             :         }
      66             : 
      67             :         // The remainder of these _mm_crc32_u*() operate on a 32-bit register.
      68             :         // We don't lose anything here: only the bottom 32-bits were populated.
      69          64 :         auto hash32 = (uint32_t)hash;
      70             : 
      71          64 :         if (bytes & 4) {
      72         126 :             hash32 = _mm_crc32_u32(hash32, unaligned_load<uint32_t>(data));
      73          63 :             data += 4;
      74             :         }
      75          64 :         if (bytes & 2) {
      76           0 :             hash32 = _mm_crc32_u16(hash32, unaligned_load<uint16_t>(data));
      77           0 :             data += 2;
      78             :         }
      79          64 :         if (bytes & 1) {
      80           0 :             hash32 = _mm_crc32_u8(hash32, unaligned_load<uint8_t>(data));
      81             :         }
      82          64 :         return hash32;
      83             :     }
      84             : 
      85             : #elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42
      86             :     // 32-bit version of above, using _mm_crc32_u32() but not _mm_crc32_u64().
      87             :     static uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t hash) {
      88             :         auto data = (const uint8_t*)vdata;
      89             : 
      90             :         if (bytes >= 12) {
      91             :             // We'll create 3 independent hashes, each using _mm_crc32_u32()
      92             :             // to hash 4 bytes per step.  Both 3 and independent are important:
      93             :             // we can execute 3 of these instructions in parallel on a single core.
      94             :             uint32_t a = hash,
      95             :                      b = hash,
      96             :                      c = hash;
      97             :             size_t steps = bytes/12;
      98             :             while (steps --> 0) {
      99             :                 a = _mm_crc32_u32(a, unaligned_load<uint32_t>(data+0));
     100             :                 b = _mm_crc32_u32(b, unaligned_load<uint32_t>(data+4));
     101             :                 c = _mm_crc32_u32(c, unaligned_load<uint32_t>(data+8));
     102             :                 data += 12;
     103             :             }
     104             :             bytes %= 12;
     105             :             hash = a^b^c;
     106             :         }
     107             : 
     108             :         SkASSERT(bytes < 12);
     109             :         if (bytes >= 8) {
     110             :             hash = _mm_crc32_u32(hash, unaligned_load<uint32_t>(data));
     111             :             bytes -= 4;
     112             :             data  += 4;
     113             :         }
     114             : 
     115             :         SkASSERT(bytes < 8);
     116             :         if (bytes & 4) {
     117             :             hash = _mm_crc32_u32(hash, unaligned_load<uint32_t>(data));
     118             :             data += 4;
     119             :         }
     120             :         if (bytes & 2) {
     121             :             hash = _mm_crc32_u16(hash, unaligned_load<uint16_t>(data));
     122             :             data += 2;
     123             :         }
     124             :         if (bytes & 1) {
     125             :             hash = _mm_crc32_u8(hash, unaligned_load<uint8_t>(data));
     126             :         }
     127             :         return hash;
     128             :     }
     129             : 
     130             : #elif defined(SK_CPU_ARM64) && defined(SK_ARM_HAS_CRC32)
     131             :     static uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t hash) {
     132             :         auto data = (const uint8_t*)vdata;
     133             :         if (bytes >= 24) {
     134             :             uint32_t a = hash,
     135             :                      b = hash,
     136             :                      c = hash;
     137             :             size_t steps = bytes/24;
     138             :             while (steps --> 0) {
     139             :                 a = __crc32d(a, unaligned_load<uint64_t>(data+ 0));
     140             :                 b = __crc32d(b, unaligned_load<uint64_t>(data+ 8));
     141             :                 c = __crc32d(c, unaligned_load<uint64_t>(data+16));
     142             :                 data += 24;
     143             :             }
     144             :             bytes %= 24;
     145             :             hash = a^b^c;
     146             :         }
     147             : 
     148             :         SkASSERT(bytes < 24);
     149             :         if (bytes >= 16) {
     150             :             hash = __crc32d(hash, unaligned_load<uint64_t>(data));
     151             :             bytes -= 8;
     152             :             data  += 8;
     153             :         }
     154             : 
     155             :         SkASSERT(bytes < 16);
     156             :         if (bytes & 8) {
     157             :             hash = __crc32d(hash, unaligned_load<uint64_t>(data));
     158             :             data += 8;
     159             :         }
     160             :         if (bytes & 4) {
     161             :             hash = __crc32w(hash, unaligned_load<uint32_t>(data));
     162             :             data += 4;
     163             :         }
     164             :         if (bytes & 2) {
     165             :             hash = __crc32h(hash, unaligned_load<uint16_t>(data));
     166             :             data += 2;
     167             :         }
     168             :         if (bytes & 1) {
     169             :             hash = __crc32b(hash, unaligned_load<uint8_t>(data));
     170             :         }
     171             :         return hash;
     172             :     }
     173             : 
     174             : #else
     175             :     // This is Murmur3.
     176           0 :     static uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t hash) {
     177           0 :         auto data = (const uint8_t*)vdata;
     178             : 
     179           0 :         size_t original_bytes = bytes;
     180             : 
     181             :         // Handle 4 bytes at a time while possible.
     182           0 :         while (bytes >= 4) {
     183           0 :             uint32_t k = unaligned_load<uint32_t>(data);
     184           0 :             k *= 0xcc9e2d51;
     185           0 :             k = (k << 15) | (k >> 17);
     186           0 :             k *= 0x1b873593;
     187             : 
     188           0 :             hash ^= k;
     189           0 :             hash = (hash << 13) | (hash >> 19);
     190           0 :             hash *= 5;
     191           0 :             hash += 0xe6546b64;
     192             : 
     193           0 :             bytes -= 4;
     194           0 :             data  += 4;
     195             :         }
     196             : 
     197             :         // Handle last 0-3 bytes.
     198           0 :         uint32_t k = 0;
     199           0 :         switch (bytes & 3) {
     200           0 :             case 3: k ^= data[2] << 16;
     201           0 :             case 2: k ^= data[1] <<  8;
     202           0 :             case 1: k ^= data[0] <<  0;
     203           0 :                     k *= 0xcc9e2d51;
     204           0 :                     k = (k << 15) | (k >> 17);
     205           0 :                     k *= 0x1b873593;
     206           0 :                     hash ^= k;
     207             :         }
     208             : 
     209           0 :         hash ^= original_bytes;
     210           0 :         return SkChecksum::Mix(hash);
     211             :     }
     212             : #endif
     213             : 
     214             : }  // namespace SK_OPTS_NS
     215             : 
     216             : #endif//SkChecksum_opts_DEFINED

Generated by: LCOV version 1.13