|           Line data    Source code 
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             : *******************************************************************************
       5             : * Copyright (C) 1996-2012, International Business Machines Corporation and
       6             : * others. All Rights Reserved.
       7             : *******************************************************************************
       8             : */
       9             : //===============================================================================
      10             : //
      11             : // File sortkey.cpp
      12             : //
      13             : //
      14             : //
      15             : // Created by: Helena Shih
      16             : //
      17             : // Modification History:
      18             : //
      19             : //  Date         Name          Description
      20             : //
      21             : //  6/20/97      helena        Java class name change.
      22             : //  6/23/97      helena        Added comments to make code more readable.
      23             : //  6/26/98      erm           Canged to use byte arrays instead of UnicodeString
      24             : //  7/31/98      erm           hashCode: minimum inc should be 2 not 1,
      25             : //                             Cleaned up operator=
      26             : // 07/12/99      helena        HPUX 11 CC port.
      27             : // 03/06/01      synwee        Modified compareTo, to handle the result of
      28             : //                             2 string similar in contents, but one is longer
      29             : //                             than the other
      30             : //===============================================================================
      31             : 
      32             : #include "unicode/utypes.h"
      33             : 
      34             : #if !UCONFIG_NO_COLLATION
      35             : 
      36             : #include "unicode/sortkey.h"
      37             : #include "cmemory.h"
      38             : #include "uelement.h"
      39             : #include "ustr_imp.h"
      40             : 
      41             : U_NAMESPACE_BEGIN
      42             : 
      43             : // A hash code of kInvalidHashCode indicates that the hash code needs
      44             : // to be computed. A hash code of kEmptyHashCode is used for empty keys
      45             : // and for any key whose computed hash code is kInvalidHashCode.
      46             : static const int32_t kInvalidHashCode = 0;
      47             : static const int32_t kEmptyHashCode = 1;
      48             : // The "bogus hash code" replaces a separate fBogus flag.
      49             : static const int32_t kBogusHashCode = 2;
      50             : 
      51           0 : UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey)
      52             : 
      53           0 : CollationKey::CollationKey()
      54             :     : UObject(), fFlagAndLength(0),
      55           0 :       fHashCode(kEmptyHashCode)
      56             : {
      57           0 : }
      58             : 
      59             : // Create a collation key from a bit array.
      60           0 : CollationKey::CollationKey(const uint8_t* newValues, int32_t count)
      61             :     : UObject(), fFlagAndLength(count),
      62           0 :       fHashCode(kInvalidHashCode)
      63             : {
      64           0 :     if (count < 0 || (newValues == NULL && count != 0) ||
      65           0 :             (count > getCapacity() && reallocate(count, 0) == NULL)) {
      66           0 :         setToBogus();
      67           0 :         return;
      68             :     }
      69             : 
      70           0 :     if (count > 0) {
      71           0 :         uprv_memcpy(getBytes(), newValues, count);
      72             :     }
      73             : }
      74             : 
      75           0 : CollationKey::CollationKey(const CollationKey& other)
      76           0 :     : UObject(other), fFlagAndLength(other.getLength()),
      77           0 :       fHashCode(other.fHashCode)
      78             : {
      79           0 :     if (other.isBogus())
      80             :     {
      81           0 :         setToBogus();
      82           0 :         return;
      83             :     }
      84             : 
      85           0 :     int32_t length = fFlagAndLength;
      86           0 :     if (length > getCapacity() && reallocate(length, 0) == NULL) {
      87           0 :         setToBogus();
      88           0 :         return;
      89             :     }
      90             : 
      91           0 :     if (length > 0) {
      92           0 :         uprv_memcpy(getBytes(), other.getBytes(), length);
      93             :     }
      94             : }
      95             : 
      96           0 : CollationKey::~CollationKey()
      97             : {
      98           0 :     if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); }
      99           0 : }
     100             : 
     101           0 : uint8_t *CollationKey::reallocate(int32_t newCapacity, int32_t length) {
     102           0 :     uint8_t *newBytes = static_cast<uint8_t *>(uprv_malloc(newCapacity));
     103           0 :     if(newBytes == NULL) { return NULL; }
     104           0 :     if(length > 0) {
     105           0 :         uprv_memcpy(newBytes, getBytes(), length);
     106             :     }
     107           0 :     if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); }
     108           0 :     fUnion.fFields.fBytes = newBytes;
     109           0 :     fUnion.fFields.fCapacity = newCapacity;
     110           0 :     fFlagAndLength |= 0x80000000;
     111           0 :     return newBytes;
     112             : }
     113             : 
     114           0 : void CollationKey::setLength(int32_t newLength) {
     115             :     // U_ASSERT(newLength >= 0 && newLength <= getCapacity());
     116           0 :     fFlagAndLength = (fFlagAndLength & 0x80000000) | newLength;
     117           0 :     fHashCode = kInvalidHashCode;
     118           0 : }
     119             : 
     120             : // set the key to an empty state
     121             : CollationKey&
     122           0 : CollationKey::reset()
     123             : {
     124           0 :     fFlagAndLength &= 0x80000000;
     125           0 :     fHashCode = kEmptyHashCode;
     126             : 
     127           0 :     return *this;
     128             : }
     129             : 
     130             : // set the key to a "bogus" or invalid state
     131             : CollationKey&
     132           0 : CollationKey::setToBogus()
     133             : {
     134           0 :     fFlagAndLength &= 0x80000000;
     135           0 :     fHashCode = kBogusHashCode;
     136             : 
     137           0 :     return *this;
     138             : }
     139             : 
     140             : UBool
     141           0 : CollationKey::operator==(const CollationKey& source) const
     142             : {
     143           0 :     return getLength() == source.getLength() &&
     144           0 :             (this == &source ||
     145           0 :              uprv_memcmp(getBytes(), source.getBytes(), getLength()) == 0);
     146             : }
     147             : 
     148             : const CollationKey&
     149           0 : CollationKey::operator=(const CollationKey& other)
     150             : {
     151           0 :     if (this != &other)
     152             :     {
     153           0 :         if (other.isBogus())
     154             :         {
     155           0 :             return setToBogus();
     156             :         }
     157             : 
     158           0 :         int32_t length = other.getLength();
     159           0 :         if (length > getCapacity() && reallocate(length, 0) == NULL) {
     160           0 :             return setToBogus();
     161             :         }
     162           0 :         if (length > 0) {
     163           0 :             uprv_memcpy(getBytes(), other.getBytes(), length);
     164             :         }
     165           0 :         fFlagAndLength = (fFlagAndLength & 0x80000000) | length;
     166           0 :         fHashCode = other.fHashCode;
     167             :     }
     168             : 
     169           0 :     return *this;
     170             : }
     171             : 
     172             : // Bitwise comparison for the collation keys.
     173             : Collator::EComparisonResult
     174           0 : CollationKey::compareTo(const CollationKey& target) const
     175             : {
     176           0 :     UErrorCode errorCode = U_ZERO_ERROR;
     177           0 :     return static_cast<Collator::EComparisonResult>(compareTo(target, errorCode));
     178             : }
     179             : 
     180             : // Bitwise comparison for the collation keys.
     181             : UCollationResult
     182           0 : CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const
     183             : {
     184           0 :   if(U_SUCCESS(status)) {
     185           0 :     const uint8_t *src = getBytes();
     186           0 :     const uint8_t *tgt = target.getBytes();
     187             : 
     188             :     // are we comparing the same string
     189           0 :     if (src == tgt)
     190           0 :         return  UCOL_EQUAL;
     191             : 
     192             :     UCollationResult result;
     193             : 
     194             :     // are we comparing different lengths?
     195           0 :     int32_t minLength = getLength();
     196           0 :     int32_t targetLength = target.getLength();
     197           0 :     if (minLength < targetLength) {
     198           0 :         result = UCOL_LESS;
     199           0 :     } else if (minLength == targetLength) {
     200           0 :         result = UCOL_EQUAL;
     201             :     } else {
     202           0 :         minLength = targetLength;
     203           0 :         result = UCOL_GREATER;
     204             :     }
     205             : 
     206           0 :     if (minLength > 0) {
     207           0 :         int diff = uprv_memcmp(src, tgt, minLength);
     208           0 :         if (diff > 0) {
     209           0 :             return UCOL_GREATER;
     210             :         }
     211             :         else
     212           0 :             if (diff < 0) {
     213           0 :                 return UCOL_LESS;
     214             :             }
     215             :     }
     216             : 
     217           0 :     return result;
     218             :   } else {
     219           0 :     return UCOL_EQUAL;
     220             :   }
     221             : }
     222             : 
     223             : #ifdef U_USE_COLLATION_KEY_DEPRECATES
     224             : // Create a copy of the byte array.
     225             : uint8_t*
     226             : CollationKey::toByteArray(int32_t& count) const
     227             : {
     228             :     uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount );
     229             : 
     230             :     if (result == NULL)
     231             :     {
     232             :         count = 0;
     233             :     }
     234             :     else
     235             :     {
     236             :         count = fCount;
     237             :         if (count > 0) {
     238             :             uprv_memcpy(result, fBytes, fCount);
     239             :         }
     240             :     }
     241             : 
     242             :     return result;
     243             : }
     244             : #endif
     245             : 
     246             : static int32_t
     247           0 : computeHashCode(const uint8_t *key, int32_t  length) {
     248           0 :     const char *s = reinterpret_cast<const char *>(key);
     249             :     int32_t hash;
     250           0 :     if (s == NULL || length == 0) {
     251           0 :         hash = kEmptyHashCode;
     252             :     } else {
     253           0 :         hash = ustr_hashCharsN(s, length);
     254           0 :         if (hash == kInvalidHashCode || hash == kBogusHashCode) {
     255           0 :             hash = kEmptyHashCode;
     256             :         }
     257             :     }
     258           0 :     return hash;
     259             : }
     260             : 
     261             : int32_t
     262           0 : CollationKey::hashCode() const
     263             : {
     264             :     // (Cribbed from UnicodeString)
     265             :     // We cache the hashCode; when it becomes invalid, due to any change to the
     266             :     // string, we note this by setting it to kInvalidHashCode. [LIU]
     267             : 
     268             :     // Note: This method is semantically const, but physically non-const.
     269             : 
     270           0 :     if (fHashCode == kInvalidHashCode)
     271             :     {
     272           0 :         fHashCode = computeHashCode(getBytes(), getLength());
     273             :     }
     274             : 
     275           0 :     return fHashCode;
     276             : }
     277             : 
     278             : U_NAMESPACE_END
     279             : 
     280             : U_CAPI int32_t U_EXPORT2
     281           0 : ucol_keyHashCode(const uint8_t *key, 
     282             :                        int32_t  length)
     283             : {
     284           0 :     return icu::computeHashCode(key, length);
     285             : }
     286             : 
     287             : #endif /* #if !UCONFIG_NO_COLLATION */
 |