Line data Source code
1 : /*
2 : * Copyright 2016 Google Inc.
3 : *
4 : * Use of this source code is governed by a BSD-style license that can be
5 : * found in the LICENSE file.
6 : */
7 :
8 : #ifndef SkChecksum_opts_DEFINED
9 : #define SkChecksum_opts_DEFINED
10 :
11 : #include "SkChecksum.h"
12 : #include "SkTypes.h"
13 :
14 : #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42
15 : #include <immintrin.h>
16 : #elif defined(SK_CPU_ARM64) && defined(SK_ARM_HAS_CRC32)
17 : #include <arm_acle.h>
18 : #endif
19 :
20 : namespace SK_OPTS_NS {
21 :
22 : template <typename T>
23 575 : static inline T unaligned_load(const uint8_t* src) {
24 : T val;
25 575 : memcpy(&val, src, sizeof(val));
26 575 : return val;
27 : }
28 :
29 : #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42 && (defined(__x86_64__) || defined(_M_X64))
30 : // This is not a CRC32. It's Just A Hash that uses those instructions because they're fast.
31 64 : static uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t seed) {
32 64 : auto data = (const uint8_t*)vdata;
33 :
34 : // _mm_crc32_u64() operates on 64-bit registers, so we use uint64_t for a while.
35 64 : uint64_t hash = seed;
36 64 : if (bytes >= 24) {
37 : // We'll create 3 independent hashes, each using _mm_crc32_u64()
38 : // to hash 8 bytes per step. Both 3 and independent are important:
39 : // we can execute 3 of these instructions in parallel on a single core.
40 64 : uint64_t a = hash,
41 64 : b = hash,
42 64 : c = hash;
43 64 : size_t steps = bytes/24;
44 320 : while (steps --> 0) {
45 256 : a = _mm_crc32_u64(a, unaligned_load<uint64_t>(data+ 0));
46 256 : b = _mm_crc32_u64(b, unaligned_load<uint64_t>(data+ 8));
47 256 : c = _mm_crc32_u64(c, unaligned_load<uint64_t>(data+16));
48 128 : data += 24;
49 : }
50 64 : bytes %= 24;
51 64 : hash = a^b^c;
52 : }
53 :
54 64 : SkASSERT(bytes < 24);
55 64 : if (bytes >= 16) {
56 128 : hash = _mm_crc32_u64(hash, unaligned_load<uint64_t>(data));
57 64 : bytes -= 8;
58 64 : data += 8;
59 : }
60 :
61 64 : SkASSERT(bytes < 16);
62 64 : if (bytes & 8) {
63 128 : hash = _mm_crc32_u64(hash, unaligned_load<uint64_t>(data));
64 64 : data += 8;
65 : }
66 :
67 : // The remainder of these _mm_crc32_u*() operate on a 32-bit register.
68 : // We don't lose anything here: only the bottom 32-bits were populated.
69 64 : auto hash32 = (uint32_t)hash;
70 :
71 64 : if (bytes & 4) {
72 126 : hash32 = _mm_crc32_u32(hash32, unaligned_load<uint32_t>(data));
73 63 : data += 4;
74 : }
75 64 : if (bytes & 2) {
76 0 : hash32 = _mm_crc32_u16(hash32, unaligned_load<uint16_t>(data));
77 0 : data += 2;
78 : }
79 64 : if (bytes & 1) {
80 0 : hash32 = _mm_crc32_u8(hash32, unaligned_load<uint8_t>(data));
81 : }
82 64 : return hash32;
83 : }
84 :
85 : #elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42
86 : // 32-bit version of above, using _mm_crc32_u32() but not _mm_crc32_u64().
87 : static uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t hash) {
88 : auto data = (const uint8_t*)vdata;
89 :
90 : if (bytes >= 12) {
91 : // We'll create 3 independent hashes, each using _mm_crc32_u32()
92 : // to hash 4 bytes per step. Both 3 and independent are important:
93 : // we can execute 3 of these instructions in parallel on a single core.
94 : uint32_t a = hash,
95 : b = hash,
96 : c = hash;
97 : size_t steps = bytes/12;
98 : while (steps --> 0) {
99 : a = _mm_crc32_u32(a, unaligned_load<uint32_t>(data+0));
100 : b = _mm_crc32_u32(b, unaligned_load<uint32_t>(data+4));
101 : c = _mm_crc32_u32(c, unaligned_load<uint32_t>(data+8));
102 : data += 12;
103 : }
104 : bytes %= 12;
105 : hash = a^b^c;
106 : }
107 :
108 : SkASSERT(bytes < 12);
109 : if (bytes >= 8) {
110 : hash = _mm_crc32_u32(hash, unaligned_load<uint32_t>(data));
111 : bytes -= 4;
112 : data += 4;
113 : }
114 :
115 : SkASSERT(bytes < 8);
116 : if (bytes & 4) {
117 : hash = _mm_crc32_u32(hash, unaligned_load<uint32_t>(data));
118 : data += 4;
119 : }
120 : if (bytes & 2) {
121 : hash = _mm_crc32_u16(hash, unaligned_load<uint16_t>(data));
122 : data += 2;
123 : }
124 : if (bytes & 1) {
125 : hash = _mm_crc32_u8(hash, unaligned_load<uint8_t>(data));
126 : }
127 : return hash;
128 : }
129 :
130 : #elif defined(SK_CPU_ARM64) && defined(SK_ARM_HAS_CRC32)
131 : static uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t hash) {
132 : auto data = (const uint8_t*)vdata;
133 : if (bytes >= 24) {
134 : uint32_t a = hash,
135 : b = hash,
136 : c = hash;
137 : size_t steps = bytes/24;
138 : while (steps --> 0) {
139 : a = __crc32d(a, unaligned_load<uint64_t>(data+ 0));
140 : b = __crc32d(b, unaligned_load<uint64_t>(data+ 8));
141 : c = __crc32d(c, unaligned_load<uint64_t>(data+16));
142 : data += 24;
143 : }
144 : bytes %= 24;
145 : hash = a^b^c;
146 : }
147 :
148 : SkASSERT(bytes < 24);
149 : if (bytes >= 16) {
150 : hash = __crc32d(hash, unaligned_load<uint64_t>(data));
151 : bytes -= 8;
152 : data += 8;
153 : }
154 :
155 : SkASSERT(bytes < 16);
156 : if (bytes & 8) {
157 : hash = __crc32d(hash, unaligned_load<uint64_t>(data));
158 : data += 8;
159 : }
160 : if (bytes & 4) {
161 : hash = __crc32w(hash, unaligned_load<uint32_t>(data));
162 : data += 4;
163 : }
164 : if (bytes & 2) {
165 : hash = __crc32h(hash, unaligned_load<uint16_t>(data));
166 : data += 2;
167 : }
168 : if (bytes & 1) {
169 : hash = __crc32b(hash, unaligned_load<uint8_t>(data));
170 : }
171 : return hash;
172 : }
173 :
174 : #else
175 : // This is Murmur3.
176 0 : static uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t hash) {
177 0 : auto data = (const uint8_t*)vdata;
178 :
179 0 : size_t original_bytes = bytes;
180 :
181 : // Handle 4 bytes at a time while possible.
182 0 : while (bytes >= 4) {
183 0 : uint32_t k = unaligned_load<uint32_t>(data);
184 0 : k *= 0xcc9e2d51;
185 0 : k = (k << 15) | (k >> 17);
186 0 : k *= 0x1b873593;
187 :
188 0 : hash ^= k;
189 0 : hash = (hash << 13) | (hash >> 19);
190 0 : hash *= 5;
191 0 : hash += 0xe6546b64;
192 :
193 0 : bytes -= 4;
194 0 : data += 4;
195 : }
196 :
197 : // Handle last 0-3 bytes.
198 0 : uint32_t k = 0;
199 0 : switch (bytes & 3) {
200 0 : case 3: k ^= data[2] << 16;
201 0 : case 2: k ^= data[1] << 8;
202 0 : case 1: k ^= data[0] << 0;
203 0 : k *= 0xcc9e2d51;
204 0 : k = (k << 15) | (k >> 17);
205 0 : k *= 0x1b873593;
206 0 : hash ^= k;
207 : }
208 :
209 0 : hash ^= original_bytes;
210 0 : return SkChecksum::Mix(hash);
211 : }
212 : #endif
213 :
214 : } // namespace SK_OPTS_NS
215 :
216 : #endif//SkChecksum_opts_DEFINED
|