LCOV - code coverage report
Current view: top level - mfbt - HashFunctions.h (source / functions) Hit Total Coverage
Test: output.info Lines: 74 74 100.0 %
Date: 2017-07-14 16:53:18 Functions: 78 122 63.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
       2             : /* vim: set ts=8 sts=2 et sw=2 tw=80: */
       3             : /* This Source Code Form is subject to the terms of the Mozilla Public
       4             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       5             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
       6             : 
       7             : /* Utilities for hashing. */
       8             : 
       9             : /*
      10             :  * This file exports functions for hashing data down to a 32-bit value,
      11             :  * including:
      12             :  *
      13             :  *  - HashString    Hash a char* or char16_t/wchar_t* of known or unknown
      14             :  *                  length.
      15             :  *
      16             :  *  - HashBytes     Hash a byte array of known length.
      17             :  *
      18             :  *  - HashGeneric   Hash one or more values.  Currently, we support uint32_t,
      19             :  *                  types which can be implicitly cast to uint32_t, data
      20             :  *                  pointers, and function pointers.
      21             :  *
      22             :  *  - AddToHash     Add one or more values to the given hash.  This supports the
      23             :  *                  same list of types as HashGeneric.
      24             :  *
      25             :  *
      26             :  * You can chain these functions together to hash complex objects.  For example:
      27             :  *
      28             :  *  class ComplexObject
      29             :  *  {
      30             :  *    char* mStr;
      31             :  *    uint32_t mUint1, mUint2;
      32             :  *    void (*mCallbackFn)();
      33             :  *
      34             :  *  public:
      35             :  *    uint32_t hash()
      36             :  *    {
      37             :  *      uint32_t hash = HashString(mStr);
      38             :  *      hash = AddToHash(hash, mUint1, mUint2);
      39             :  *      return AddToHash(hash, mCallbackFn);
      40             :  *    }
      41             :  *  };
      42             :  *
      43             :  * If you want to hash an nsAString or nsACString, use the HashString functions
      44             :  * in nsHashKeys.h.
      45             :  */
      46             : 
      47             : #ifndef mozilla_HashFunctions_h
      48             : #define mozilla_HashFunctions_h
      49             : 
      50             : #include "mozilla/Assertions.h"
      51             : #include "mozilla/Attributes.h"
      52             : #include "mozilla/Char16.h"
      53             : #include "mozilla/MathAlgorithms.h"
      54             : #include "mozilla/Types.h"
      55             : 
      56             : #include <stdint.h>
      57             : 
      58             : #ifdef __cplusplus
      59             : namespace mozilla {
      60             : 
      61             : /**
      62             :  * The golden ratio as a 32-bit fixed-point value.
      63             :  */
      64             : static const uint32_t kGoldenRatioU32 = 0x9E3779B9U;
      65             : 
      66             : inline uint32_t
      67    23724819 : RotateBitsLeft32(uint32_t aValue, uint8_t aBits)
      68             : {
      69    23724819 :   MOZ_ASSERT(aBits < 32);
      70    23724819 :   return (aValue << aBits) | (aValue >> (32 - aBits));
      71             : }
      72             : 
      73             : namespace detail {
      74             : 
      75             : inline uint32_t
      76    23724764 : AddU32ToHash(uint32_t aHash, uint32_t aValue)
      77             : {
      78             :   /*
      79             :    * This is the meat of all our hash routines.  This hash function is not
      80             :    * particularly sophisticated, but it seems to work well for our mostly
      81             :    * plain-text inputs.  Implementation notes follow.
      82             :    *
      83             :    * Our use of the golden ratio here is arbitrary; we could pick almost any
      84             :    * number which:
      85             :    *
      86             :    *  * is odd (because otherwise, all our hash values will be even)
      87             :    *
      88             :    *  * has a reasonably-even mix of 1's and 0's (consider the extreme case
      89             :    *    where we multiply by 0x3 or 0xeffffff -- this will not produce good
      90             :    *    mixing across all bits of the hash).
      91             :    *
      92             :    * The rotation length of 5 is also arbitrary, although an odd number is again
      93             :    * preferable so our hash explores the whole universe of possible rotations.
      94             :    *
      95             :    * Finally, we multiply by the golden ratio *after* xor'ing, not before.
      96             :    * Otherwise, if |aHash| is 0 (as it often is for the beginning of a
      97             :    * message), the expression
      98             :    *
      99             :    *   (kGoldenRatioU32 * RotateBitsLeft(aHash, 5)) |xor| aValue
     100             :    *
     101             :    * evaluates to |aValue|.
     102             :    *
     103             :    * (Number-theoretic aside: Because any odd number |m| is relatively prime to
     104             :    * our modulus (2^32), the list
     105             :    *
     106             :    *    [x * m (mod 2^32) for 0 <= x < 2^32]
     107             :    *
     108             :    * has no duplicate elements.  This means that multiplying by |m| does not
     109             :    * cause us to skip any possible hash values.
     110             :    *
     111             :    * It's also nice if |m| has large-ish order mod 2^32 -- that is, if the
     112             :    * smallest k such that m^k == 1 (mod 2^32) is large -- so we can safely
     113             :    * multiply our hash value by |m| a few times without negating the
     114             :    * multiplicative effect.  Our golden ratio constant has order 2^29, which is
     115             :    * more than enough for our purposes.)
     116             :    */
     117    23724764 :   return kGoldenRatioU32 * (RotateBitsLeft32(aHash, 5) ^ aValue);
     118             : }
     119             : 
     120             : /**
     121             :  * AddUintptrToHash takes sizeof(uintptr_t) as a template parameter.
     122             :  */
     123             : template<size_t PtrSize>
     124             : inline uint32_t
     125             : AddUintptrToHash(uint32_t aHash, uintptr_t aValue);
     126             : 
     127             : template<>
     128             : inline uint32_t
     129             : AddUintptrToHash<4>(uint32_t aHash, uintptr_t aValue)
     130             : {
     131             :   return AddU32ToHash(aHash, static_cast<uint32_t>(aValue));
     132             : }
     133             : 
     134             : template<>
     135             : inline uint32_t
     136     1381555 : AddUintptrToHash<8>(uint32_t aHash, uintptr_t aValue)
     137             : {
     138             :   /*
     139             :    * The static cast to uint64_t below is necessary because this function
     140             :    * sometimes gets compiled on 32-bit platforms (yes, even though it's a
     141             :    * template and we never call this particular override in a 32-bit build).  If
     142             :    * we do aValue >> 32 on a 32-bit machine, we're shifting a 32-bit uintptr_t
     143             :    * right 32 bits, and the compiler throws an error.
     144             :    */
     145     1381555 :   uint32_t v1 = static_cast<uint32_t>(aValue);
     146     1381555 :   uint32_t v2 = static_cast<uint32_t>(static_cast<uint64_t>(aValue) >> 32);
     147     1381555 :   return AddU32ToHash(AddU32ToHash(aHash, v1), v2);
     148             : }
     149             : 
     150             : } /* namespace detail */
     151             : 
     152             : /**
     153             :  * AddToHash takes a hash and some values and returns a new hash based on the
     154             :  * inputs.
     155             :  *
     156             :  * Currently, we support hashing uint32_t's, values which we can implicitly
     157             :  * convert to uint32_t, data pointers, and function pointers.
     158             :  */
     159             : template<typename A>
     160             : MOZ_MUST_USE inline uint32_t
     161    20964311 : AddToHash(uint32_t aHash, A aA)
     162             : {
     163             :   /*
     164             :    * Try to convert |A| to uint32_t implicitly.  If this works, great.  If not,
     165             :    * we'll error out.
     166             :    */
     167    20964311 :   return detail::AddU32ToHash(aHash, aA);
     168             : }
     169             : 
     170             : template<typename A>
     171             : MOZ_MUST_USE inline uint32_t
     172       76760 : AddToHash(uint32_t aHash, A* aA)
     173             : {
     174             :   /*
     175             :    * You might think this function should just take a void*.  But then we'd only
     176             :    * catch data pointers and couldn't handle function pointers.
     177             :    */
     178             : 
     179             :   static_assert(sizeof(aA) == sizeof(uintptr_t), "Strange pointer!");
     180             : 
     181       76760 :   return detail::AddUintptrToHash<sizeof(uintptr_t)>(aHash, uintptr_t(aA));
     182             : }
     183             : 
     184             : template<>
     185             : MOZ_MUST_USE inline uint32_t
     186     1304805 : AddToHash(uint32_t aHash, uintptr_t aA)
     187             : {
     188     1304805 :   return detail::AddUintptrToHash<sizeof(uintptr_t)>(aHash, aA);
     189             : }
     190             : 
     191             : template<typename A, typename... Args>
     192             : MOZ_MUST_USE uint32_t
     193      736426 : AddToHash(uint32_t aHash, A aArg, Args... aArgs)
     194             : {
     195      736426 :   return AddToHash(AddToHash(aHash, aArg), aArgs...);
     196             : }
     197             : 
     198             : /**
     199             :  * The HashGeneric class of functions let you hash one or more values.
     200             :  *
     201             :  * If you want to hash together two values x and y, calling HashGeneric(x, y) is
     202             :  * much better than calling AddToHash(x, y), because AddToHash(x, y) assumes
     203             :  * that x has already been hashed.
     204             :  */
     205             : template<typename... Args>
     206             : MOZ_MUST_USE inline uint32_t
     207       23160 : HashGeneric(Args... aArgs)
     208             : {
     209       23160 :   return AddToHash(0, aArgs...);
     210             : }
     211             : 
     212             : namespace detail {
     213             : 
     214             : template<typename T>
     215             : uint32_t
     216       46819 : HashUntilZero(const T* aStr)
     217             : {
     218       46819 :   uint32_t hash = 0;
     219     2694363 :   for (T c; (c = *aStr); aStr++) {
     220     1323772 :     hash = AddToHash(hash, c);
     221             :   }
     222       46819 :   return hash;
     223             : }
     224             : 
     225             : template<typename T>
     226             : uint32_t
     227     1060332 : HashKnownLength(const T* aStr, size_t aLength)
     228             : {
     229     1060332 :   uint32_t hash = 0;
     230    20337149 :   for (size_t i = 0; i < aLength; i++) {
     231    19276845 :     hash = AddToHash(hash, aStr[i]);
     232             :   }
     233     1060304 :   return hash;
     234             : }
     235             : 
     236             : } /* namespace detail */
     237             : 
     238             : /**
     239             :  * The HashString overloads below do just what you'd expect.
     240             :  *
     241             :  * If you have the string's length, you might as well call the overload which
     242             :  * includes the length.  It may be marginally faster.
     243             :  */
     244             : MOZ_MUST_USE inline uint32_t
     245       45701 : HashString(const char* aStr)
     246             : {
     247       45701 :   return detail::HashUntilZero(reinterpret_cast<const unsigned char*>(aStr));
     248             : }
     249             : 
     250             : MOZ_MUST_USE inline uint32_t
     251       39601 : HashString(const char* aStr, size_t aLength)
     252             : {
     253       39601 :   return detail::HashKnownLength(reinterpret_cast<const unsigned char*>(aStr), aLength);
     254             : }
     255             : 
     256             : MOZ_MUST_USE
     257             : inline uint32_t
     258      773947 : HashString(const unsigned char* aStr, size_t aLength)
     259             : {
     260      773947 :   return detail::HashKnownLength(aStr, aLength);
     261             : }
     262             : 
     263             : MOZ_MUST_USE inline uint32_t
     264        1118 : HashString(const char16_t* aStr)
     265             : {
     266        1118 :   return detail::HashUntilZero(aStr);
     267             : }
     268             : 
     269             : MOZ_MUST_USE inline uint32_t
     270      245527 : HashString(const char16_t* aStr, size_t aLength)
     271             : {
     272      245527 :   return detail::HashKnownLength(aStr, aLength);
     273             : }
     274             : 
     275             : /*
     276             :  * On Windows, wchar_t is not the same as char16_t, even though it's
     277             :  * the same width!
     278             :  */
     279             : #ifdef WIN32
     280             : MOZ_MUST_USE inline uint32_t
     281             : HashString(const wchar_t* aStr)
     282             : {
     283             :   return detail::HashUntilZero(aStr);
     284             : }
     285             : 
     286             : MOZ_MUST_USE inline uint32_t
     287             : HashString(const wchar_t* aStr, size_t aLength)
     288             : {
     289             :   return detail::HashKnownLength(aStr, aLength);
     290             : }
     291             : #endif
     292             : 
     293             : /**
     294             :  * Hash some number of bytes.
     295             :  *
     296             :  * This hash walks word-by-word, rather than byte-by-byte, so you won't get the
     297             :  * same result out of HashBytes as you would out of HashString.
     298             :  */
     299             : MOZ_MUST_USE extern MFBT_API uint32_t
     300             : HashBytes(const void* bytes, size_t aLength);
     301             : 
     302             : /**
     303             :  * A pseudorandom function mapping 32-bit integers to 32-bit integers.
     304             :  *
     305             :  * This is for when you're feeding private data (like pointer values or credit
     306             :  * card numbers) to a non-crypto hash function (like HashBytes) and then using
     307             :  * the hash code for something that untrusted parties could observe (like a JS
     308             :  * Map). Plug in a HashCodeScrambler before that last step to avoid leaking the
     309             :  * private data.
     310             :  *
     311             :  * By itself, this does not prevent hash-flooding DoS attacks, because an
     312             :  * attacker can still generate many values with exactly equal hash codes by
     313             :  * attacking the non-crypto hash function alone. Equal hash codes will, of
     314             :  * course, still be equal however much you scramble them.
     315             :  *
     316             :  * The algorithm is SipHash-1-3. See <https://131002.net/siphash/>.
     317             :  */
     318             : class HashCodeScrambler
     319             : {
     320             :   struct SipHasher;
     321             : 
     322             :   uint64_t mK0, mK1;
     323             : 
     324             : public:
     325             :   /** Creates a new scrambler with the given 128-bit key. */
     326         439 :   constexpr HashCodeScrambler(uint64_t aK0, uint64_t aK1) : mK0(aK0), mK1(aK1) {}
     327             : 
     328             :   /**
     329             :    * Scramble a hash code. Always produces the same result for the same
     330             :    * combination of key and hash code.
     331             :    */
     332         681 :   uint32_t scramble(uint32_t aHashCode) const
     333             :   {
     334         681 :     SipHasher hasher(mK0, mK1);
     335         681 :     return uint32_t(hasher.sipHash(aHashCode));
     336             :   }
     337             : 
     338             : private:
     339             :   struct SipHasher
     340             :   {
     341         681 :     SipHasher(uint64_t aK0, uint64_t aK1)
     342         681 :     {
     343             :       // 1. Initialization.
     344         681 :       mV0 = aK0 ^ UINT64_C(0x736f6d6570736575);
     345         681 :       mV1 = aK1 ^ UINT64_C(0x646f72616e646f6d);
     346         681 :       mV2 = aK0 ^ UINT64_C(0x6c7967656e657261);
     347         681 :       mV3 = aK1 ^ UINT64_C(0x7465646279746573);
     348         681 :     }
     349             : 
     350         681 :     uint64_t sipHash(uint64_t aM)
     351             :     {
     352             :       // 2. Compression.
     353         681 :       mV3 ^= aM;
     354         681 :       sipRound();
     355         681 :       mV0 ^= aM;
     356             : 
     357             :       // 3. Finalization.
     358         681 :       mV2 ^= 0xff;
     359        2724 :       for (int i = 0; i < 3; i++)
     360        2043 :         sipRound();
     361         681 :       return mV0 ^ mV1 ^ mV2 ^ mV3;
     362             :     }
     363             : 
     364        2724 :     void sipRound()
     365             :     {
     366        2724 :       mV0 += mV1;
     367        2724 :       mV1 = RotateLeft(mV1, 13);
     368        2724 :       mV1 ^= mV0;
     369        2724 :       mV0 = RotateLeft(mV0, 32);
     370        2724 :       mV2 += mV3;
     371        2724 :       mV3 = RotateLeft(mV3, 16);
     372        2724 :       mV3 ^= mV2;
     373        2724 :       mV0 += mV3;
     374        2724 :       mV3 = RotateLeft(mV3, 21);
     375        2724 :       mV3 ^= mV0;
     376        2724 :       mV2 += mV1;
     377        2724 :       mV1 = RotateLeft(mV1, 17);
     378        2724 :       mV1 ^= mV2;
     379        2724 :       mV2 = RotateLeft(mV2, 32);
     380        2724 :     }
     381             : 
     382             :     uint64_t mV0, mV1, mV2, mV3;
     383             :   };
     384             : };
     385             : 
     386             : } /* namespace mozilla */
     387             : #endif /* __cplusplus */
     388             : 
     389             : #endif /* mozilla_HashFunctions_h */

Generated by: LCOV version 1.13