Line data Source code
1 : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 : /* This Source Code Form is subject to the terms of the Mozilla Public
4 : * License, v. 2.0. If a copy of the MPL was not distributed with this
5 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 :
7 : /* Utilities for hashing. */
8 :
9 : /*
10 : * This file exports functions for hashing data down to a 32-bit value,
11 : * including:
12 : *
13 : * - HashString Hash a char* or char16_t/wchar_t* of known or unknown
14 : * length.
15 : *
16 : * - HashBytes Hash a byte array of known length.
17 : *
18 : * - HashGeneric Hash one or more values. Currently, we support uint32_t,
19 : * types which can be implicitly cast to uint32_t, data
20 : * pointers, and function pointers.
21 : *
22 : * - AddToHash Add one or more values to the given hash. This supports the
23 : * same list of types as HashGeneric.
24 : *
25 : *
26 : * You can chain these functions together to hash complex objects. For example:
27 : *
28 : * class ComplexObject
29 : * {
30 : * char* mStr;
31 : * uint32_t mUint1, mUint2;
32 : * void (*mCallbackFn)();
33 : *
34 : * public:
35 : * uint32_t hash()
36 : * {
37 : * uint32_t hash = HashString(mStr);
38 : * hash = AddToHash(hash, mUint1, mUint2);
39 : * return AddToHash(hash, mCallbackFn);
40 : * }
41 : * };
42 : *
43 : * If you want to hash an nsAString or nsACString, use the HashString functions
44 : * in nsHashKeys.h.
45 : */
46 :
47 : #ifndef mozilla_HashFunctions_h
48 : #define mozilla_HashFunctions_h
49 :
50 : #include "mozilla/Assertions.h"
51 : #include "mozilla/Attributes.h"
52 : #include "mozilla/Char16.h"
53 : #include "mozilla/MathAlgorithms.h"
54 : #include "mozilla/Types.h"
55 :
56 : #include <stdint.h>
57 :
58 : #ifdef __cplusplus
59 : namespace mozilla {
60 :
61 : /**
62 : * The golden ratio as a 32-bit fixed-point value.
63 : */
64 : static const uint32_t kGoldenRatioU32 = 0x9E3779B9U;
65 :
66 : inline uint32_t
67 23724819 : RotateBitsLeft32(uint32_t aValue, uint8_t aBits)
68 : {
69 23724819 : MOZ_ASSERT(aBits < 32);
70 23724819 : return (aValue << aBits) | (aValue >> (32 - aBits));
71 : }
72 :
73 : namespace detail {
74 :
75 : inline uint32_t
76 23724764 : AddU32ToHash(uint32_t aHash, uint32_t aValue)
77 : {
78 : /*
79 : * This is the meat of all our hash routines. This hash function is not
80 : * particularly sophisticated, but it seems to work well for our mostly
81 : * plain-text inputs. Implementation notes follow.
82 : *
83 : * Our use of the golden ratio here is arbitrary; we could pick almost any
84 : * number which:
85 : *
86 : * * is odd (because otherwise, all our hash values will be even)
87 : *
88 : * * has a reasonably-even mix of 1's and 0's (consider the extreme case
89 : * where we multiply by 0x3 or 0xeffffff -- this will not produce good
90 : * mixing across all bits of the hash).
91 : *
92 : * The rotation length of 5 is also arbitrary, although an odd number is again
93 : * preferable so our hash explores the whole universe of possible rotations.
94 : *
95 : * Finally, we multiply by the golden ratio *after* xor'ing, not before.
96 : * Otherwise, if |aHash| is 0 (as it often is for the beginning of a
97 : * message), the expression
98 : *
99 : * (kGoldenRatioU32 * RotateBitsLeft(aHash, 5)) |xor| aValue
100 : *
101 : * evaluates to |aValue|.
102 : *
103 : * (Number-theoretic aside: Because any odd number |m| is relatively prime to
104 : * our modulus (2^32), the list
105 : *
106 : * [x * m (mod 2^32) for 0 <= x < 2^32]
107 : *
108 : * has no duplicate elements. This means that multiplying by |m| does not
109 : * cause us to skip any possible hash values.
110 : *
111 : * It's also nice if |m| has large-ish order mod 2^32 -- that is, if the
112 : * smallest k such that m^k == 1 (mod 2^32) is large -- so we can safely
113 : * multiply our hash value by |m| a few times without negating the
114 : * multiplicative effect. Our golden ratio constant has order 2^29, which is
115 : * more than enough for our purposes.)
116 : */
117 23724764 : return kGoldenRatioU32 * (RotateBitsLeft32(aHash, 5) ^ aValue);
118 : }
119 :
120 : /**
121 : * AddUintptrToHash takes sizeof(uintptr_t) as a template parameter.
122 : */
123 : template<size_t PtrSize>
124 : inline uint32_t
125 : AddUintptrToHash(uint32_t aHash, uintptr_t aValue);
126 :
127 : template<>
128 : inline uint32_t
129 : AddUintptrToHash<4>(uint32_t aHash, uintptr_t aValue)
130 : {
131 : return AddU32ToHash(aHash, static_cast<uint32_t>(aValue));
132 : }
133 :
134 : template<>
135 : inline uint32_t
136 1381555 : AddUintptrToHash<8>(uint32_t aHash, uintptr_t aValue)
137 : {
138 : /*
139 : * The static cast to uint64_t below is necessary because this function
140 : * sometimes gets compiled on 32-bit platforms (yes, even though it's a
141 : * template and we never call this particular override in a 32-bit build). If
142 : * we do aValue >> 32 on a 32-bit machine, we're shifting a 32-bit uintptr_t
143 : * right 32 bits, and the compiler throws an error.
144 : */
145 1381555 : uint32_t v1 = static_cast<uint32_t>(aValue);
146 1381555 : uint32_t v2 = static_cast<uint32_t>(static_cast<uint64_t>(aValue) >> 32);
147 1381555 : return AddU32ToHash(AddU32ToHash(aHash, v1), v2);
148 : }
149 :
150 : } /* namespace detail */
151 :
152 : /**
153 : * AddToHash takes a hash and some values and returns a new hash based on the
154 : * inputs.
155 : *
156 : * Currently, we support hashing uint32_t's, values which we can implicitly
157 : * convert to uint32_t, data pointers, and function pointers.
158 : */
159 : template<typename A>
160 : MOZ_MUST_USE inline uint32_t
161 20964311 : AddToHash(uint32_t aHash, A aA)
162 : {
163 : /*
164 : * Try to convert |A| to uint32_t implicitly. If this works, great. If not,
165 : * we'll error out.
166 : */
167 20964311 : return detail::AddU32ToHash(aHash, aA);
168 : }
169 :
170 : template<typename A>
171 : MOZ_MUST_USE inline uint32_t
172 76760 : AddToHash(uint32_t aHash, A* aA)
173 : {
174 : /*
175 : * You might think this function should just take a void*. But then we'd only
176 : * catch data pointers and couldn't handle function pointers.
177 : */
178 :
179 : static_assert(sizeof(aA) == sizeof(uintptr_t), "Strange pointer!");
180 :
181 76760 : return detail::AddUintptrToHash<sizeof(uintptr_t)>(aHash, uintptr_t(aA));
182 : }
183 :
184 : template<>
185 : MOZ_MUST_USE inline uint32_t
186 1304805 : AddToHash(uint32_t aHash, uintptr_t aA)
187 : {
188 1304805 : return detail::AddUintptrToHash<sizeof(uintptr_t)>(aHash, aA);
189 : }
190 :
191 : template<typename A, typename... Args>
192 : MOZ_MUST_USE uint32_t
193 736426 : AddToHash(uint32_t aHash, A aArg, Args... aArgs)
194 : {
195 736426 : return AddToHash(AddToHash(aHash, aArg), aArgs...);
196 : }
197 :
198 : /**
199 : * The HashGeneric class of functions let you hash one or more values.
200 : *
201 : * If you want to hash together two values x and y, calling HashGeneric(x, y) is
202 : * much better than calling AddToHash(x, y), because AddToHash(x, y) assumes
203 : * that x has already been hashed.
204 : */
205 : template<typename... Args>
206 : MOZ_MUST_USE inline uint32_t
207 23160 : HashGeneric(Args... aArgs)
208 : {
209 23160 : return AddToHash(0, aArgs...);
210 : }
211 :
212 : namespace detail {
213 :
214 : template<typename T>
215 : uint32_t
216 46819 : HashUntilZero(const T* aStr)
217 : {
218 46819 : uint32_t hash = 0;
219 2694363 : for (T c; (c = *aStr); aStr++) {
220 1323772 : hash = AddToHash(hash, c);
221 : }
222 46819 : return hash;
223 : }
224 :
225 : template<typename T>
226 : uint32_t
227 1060332 : HashKnownLength(const T* aStr, size_t aLength)
228 : {
229 1060332 : uint32_t hash = 0;
230 20337149 : for (size_t i = 0; i < aLength; i++) {
231 19276845 : hash = AddToHash(hash, aStr[i]);
232 : }
233 1060304 : return hash;
234 : }
235 :
236 : } /* namespace detail */
237 :
238 : /**
239 : * The HashString overloads below do just what you'd expect.
240 : *
241 : * If you have the string's length, you might as well call the overload which
242 : * includes the length. It may be marginally faster.
243 : */
244 : MOZ_MUST_USE inline uint32_t
245 45701 : HashString(const char* aStr)
246 : {
247 45701 : return detail::HashUntilZero(reinterpret_cast<const unsigned char*>(aStr));
248 : }
249 :
250 : MOZ_MUST_USE inline uint32_t
251 39601 : HashString(const char* aStr, size_t aLength)
252 : {
253 39601 : return detail::HashKnownLength(reinterpret_cast<const unsigned char*>(aStr), aLength);
254 : }
255 :
256 : MOZ_MUST_USE
257 : inline uint32_t
258 773947 : HashString(const unsigned char* aStr, size_t aLength)
259 : {
260 773947 : return detail::HashKnownLength(aStr, aLength);
261 : }
262 :
263 : MOZ_MUST_USE inline uint32_t
264 1118 : HashString(const char16_t* aStr)
265 : {
266 1118 : return detail::HashUntilZero(aStr);
267 : }
268 :
269 : MOZ_MUST_USE inline uint32_t
270 245527 : HashString(const char16_t* aStr, size_t aLength)
271 : {
272 245527 : return detail::HashKnownLength(aStr, aLength);
273 : }
274 :
275 : /*
276 : * On Windows, wchar_t is not the same as char16_t, even though it's
277 : * the same width!
278 : */
279 : #ifdef WIN32
280 : MOZ_MUST_USE inline uint32_t
281 : HashString(const wchar_t* aStr)
282 : {
283 : return detail::HashUntilZero(aStr);
284 : }
285 :
286 : MOZ_MUST_USE inline uint32_t
287 : HashString(const wchar_t* aStr, size_t aLength)
288 : {
289 : return detail::HashKnownLength(aStr, aLength);
290 : }
291 : #endif
292 :
293 : /**
294 : * Hash some number of bytes.
295 : *
296 : * This hash walks word-by-word, rather than byte-by-byte, so you won't get the
297 : * same result out of HashBytes as you would out of HashString.
298 : */
299 : MOZ_MUST_USE extern MFBT_API uint32_t
300 : HashBytes(const void* bytes, size_t aLength);
301 :
302 : /**
303 : * A pseudorandom function mapping 32-bit integers to 32-bit integers.
304 : *
305 : * This is for when you're feeding private data (like pointer values or credit
306 : * card numbers) to a non-crypto hash function (like HashBytes) and then using
307 : * the hash code for something that untrusted parties could observe (like a JS
308 : * Map). Plug in a HashCodeScrambler before that last step to avoid leaking the
309 : * private data.
310 : *
311 : * By itself, this does not prevent hash-flooding DoS attacks, because an
312 : * attacker can still generate many values with exactly equal hash codes by
313 : * attacking the non-crypto hash function alone. Equal hash codes will, of
314 : * course, still be equal however much you scramble them.
315 : *
316 : * The algorithm is SipHash-1-3. See <https://131002.net/siphash/>.
317 : */
318 : class HashCodeScrambler
319 : {
320 : struct SipHasher;
321 :
322 : uint64_t mK0, mK1;
323 :
324 : public:
325 : /** Creates a new scrambler with the given 128-bit key. */
326 439 : constexpr HashCodeScrambler(uint64_t aK0, uint64_t aK1) : mK0(aK0), mK1(aK1) {}
327 :
328 : /**
329 : * Scramble a hash code. Always produces the same result for the same
330 : * combination of key and hash code.
331 : */
332 681 : uint32_t scramble(uint32_t aHashCode) const
333 : {
334 681 : SipHasher hasher(mK0, mK1);
335 681 : return uint32_t(hasher.sipHash(aHashCode));
336 : }
337 :
338 : private:
339 : struct SipHasher
340 : {
341 681 : SipHasher(uint64_t aK0, uint64_t aK1)
342 681 : {
343 : // 1. Initialization.
344 681 : mV0 = aK0 ^ UINT64_C(0x736f6d6570736575);
345 681 : mV1 = aK1 ^ UINT64_C(0x646f72616e646f6d);
346 681 : mV2 = aK0 ^ UINT64_C(0x6c7967656e657261);
347 681 : mV3 = aK1 ^ UINT64_C(0x7465646279746573);
348 681 : }
349 :
350 681 : uint64_t sipHash(uint64_t aM)
351 : {
352 : // 2. Compression.
353 681 : mV3 ^= aM;
354 681 : sipRound();
355 681 : mV0 ^= aM;
356 :
357 : // 3. Finalization.
358 681 : mV2 ^= 0xff;
359 2724 : for (int i = 0; i < 3; i++)
360 2043 : sipRound();
361 681 : return mV0 ^ mV1 ^ mV2 ^ mV3;
362 : }
363 :
364 2724 : void sipRound()
365 : {
366 2724 : mV0 += mV1;
367 2724 : mV1 = RotateLeft(mV1, 13);
368 2724 : mV1 ^= mV0;
369 2724 : mV0 = RotateLeft(mV0, 32);
370 2724 : mV2 += mV3;
371 2724 : mV3 = RotateLeft(mV3, 16);
372 2724 : mV3 ^= mV2;
373 2724 : mV0 += mV3;
374 2724 : mV3 = RotateLeft(mV3, 21);
375 2724 : mV3 ^= mV0;
376 2724 : mV2 += mV1;
377 2724 : mV1 = RotateLeft(mV1, 17);
378 2724 : mV1 ^= mV2;
379 2724 : mV2 = RotateLeft(mV2, 32);
380 2724 : }
381 :
382 : uint64_t mV0, mV1, mV2, mV3;
383 : };
384 : };
385 :
386 : } /* namespace mozilla */
387 : #endif /* __cplusplus */
388 :
389 : #endif /* mozilla_HashFunctions_h */
|