LCOV - code coverage report
Current view: top level - intl/icu/source/common - unistr.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 0 847 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 103 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             : ******************************************************************************
       5             : * Copyright (C) 1999-2016, International Business Machines Corporation and
       6             : * others. All Rights Reserved.
       7             : ******************************************************************************
       8             : *
       9             : * File unistr.cpp
      10             : *
      11             : * Modification History:
      12             : *
      13             : *   Date        Name        Description
      14             : *   09/25/98    stephen     Creation.
      15             : *   04/20/99    stephen     Overhauled per 4/16 code review.
      16             : *   07/09/99    stephen     Renamed {hi,lo},{byte,word} to icu_X for HP/UX
      17             : *   11/18/99    aliu        Added handleReplaceBetween() to make inherit from
      18             : *                           Replaceable.
      19             : *   06/25/01    grhoten     Removed the dependency on iostream
      20             : ******************************************************************************
      21             : */
      22             : 
      23             : #include "unicode/utypes.h"
      24             : #include "unicode/appendable.h"
      25             : #include "unicode/putil.h"
      26             : #include "cstring.h"
      27             : #include "cmemory.h"
      28             : #include "unicode/ustring.h"
      29             : #include "unicode/unistr.h"
      30             : #include "unicode/utf.h"
      31             : #include "unicode/utf16.h"
      32             : #include "uelement.h"
      33             : #include "ustr_imp.h"
      34             : #include "umutex.h"
      35             : #include "uassert.h"
      36             : 
      37             : #if 0
      38             : 
      39             : #include <iostream>
      40             : using namespace std;
      41             : 
      42             : //DEBUGGING
      43             : void
      44             : print(const UnicodeString& s,
      45             :       const char *name)
      46             : {
      47             :   UChar c;
      48             :   cout << name << ":|";
      49             :   for(int i = 0; i < s.length(); ++i) {
      50             :     c = s[i];
      51             :     if(c>= 0x007E || c < 0x0020)
      52             :       cout << "[0x" << hex << s[i] << "]";
      53             :     else
      54             :       cout << (char) s[i];
      55             :   }
      56             :   cout << '|' << endl;
      57             : }
      58             : 
      59             : void
      60             : print(const UChar *s,
      61             :       int32_t len,
      62             :       const char *name)
      63             : {
      64             :   UChar c;
      65             :   cout << name << ":|";
      66             :   for(int i = 0; i < len; ++i) {
      67             :     c = s[i];
      68             :     if(c>= 0x007E || c < 0x0020)
      69             :       cout << "[0x" << hex << s[i] << "]";
      70             :     else
      71             :       cout << (char) s[i];
      72             :   }
      73             :   cout << '|' << endl;
      74             : }
      75             : // END DEBUGGING
      76             : #endif
      77             : 
      78             : // Local function definitions for now
      79             : 
      80             : // need to copy areas that may overlap
      81             : static
      82             : inline void
      83           0 : us_arrayCopy(const UChar *src, int32_t srcStart,
      84             :          UChar *dst, int32_t dstStart, int32_t count)
      85             : {
      86           0 :   if(count>0) {
      87           0 :     uprv_memmove(dst+dstStart, src+srcStart, (size_t)count*sizeof(*src));
      88             :   }
      89           0 : }
      90             : 
      91             : // u_unescapeAt() callback to get a UChar from a UnicodeString
      92             : U_CDECL_BEGIN
      93             : static UChar U_CALLCONV
      94           0 : UnicodeString_charAt(int32_t offset, void *context) {
      95           0 :     return ((icu::UnicodeString*) context)->charAt(offset);
      96             : }
      97             : U_CDECL_END
      98             : 
      99             : U_NAMESPACE_BEGIN
     100             : 
     101             : /* The Replaceable virtual destructor can't be defined in the header
     102             :    due to how AIX works with multiple definitions of virtual functions.
     103             : */
     104           0 : Replaceable::~Replaceable() {}
     105             : 
     106           0 : UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString)
     107             : 
     108             : UnicodeString U_EXPORT2
     109           0 : operator+ (const UnicodeString &s1, const UnicodeString &s2) {
     110             :     return
     111           0 :         UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0).
     112           0 :             append(s1).
     113           0 :                 append(s2);
     114             : }
     115             : 
     116             : //========================================
     117             : // Reference Counting functions, put at top of file so that optimizing compilers
     118             : //                               have a chance to automatically inline.
     119             : //========================================
     120             : 
     121             : void
     122           0 : UnicodeString::addRef() {
     123           0 :   umtx_atomic_inc((u_atomic_int32_t *)fUnion.fFields.fArray - 1);
     124           0 : }
     125             : 
     126             : int32_t
     127           0 : UnicodeString::removeRef() {
     128           0 :   return umtx_atomic_dec((u_atomic_int32_t *)fUnion.fFields.fArray - 1);
     129             : }
     130             : 
     131             : int32_t
     132           0 : UnicodeString::refCount() const {
     133           0 :   return umtx_loadAcquire(*((u_atomic_int32_t *)fUnion.fFields.fArray - 1));
     134             : }
     135             : 
     136             : void
     137           0 : UnicodeString::releaseArray() {
     138           0 :   if((fUnion.fFields.fLengthAndFlags & kRefCounted) && removeRef() == 0) {
     139           0 :     uprv_free((int32_t *)fUnion.fFields.fArray - 1);
     140             :   }
     141           0 : }
     142             : 
     143             : 
     144             : 
     145             : //========================================
     146             : // Constructors
     147             : //========================================
     148             : 
     149             : // The default constructor is inline in unistr.h.
     150             : 
     151           0 : UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count) {
     152           0 :   fUnion.fFields.fLengthAndFlags = 0;
     153           0 :   if(count <= 0 || (uint32_t)c > 0x10ffff) {
     154             :     // just allocate and do not do anything else
     155           0 :     allocate(capacity);
     156           0 :   } else if(c <= 0xffff) {
     157           0 :     int32_t length = count;
     158           0 :     if(capacity < length) {
     159           0 :       capacity = length;
     160             :     }
     161           0 :     if(allocate(capacity)) {
     162           0 :       UChar *array = getArrayStart();
     163           0 :       UChar unit = (UChar)c;
     164           0 :       for(int32_t i = 0; i < length; ++i) {
     165           0 :         array[i] = unit;
     166             :       }
     167           0 :       setLength(length);
     168             :     }
     169             :   } else {  // supplementary code point, write surrogate pairs
     170           0 :     if(count > (INT32_MAX / 2)) {
     171             :       // We would get more than 2G UChars.
     172           0 :       allocate(capacity);
     173           0 :       return;
     174             :     }
     175           0 :     int32_t length = count * 2;
     176           0 :     if(capacity < length) {
     177           0 :       capacity = length;
     178             :     }
     179           0 :     if(allocate(capacity)) {
     180           0 :       UChar *array = getArrayStart();
     181           0 :       UChar lead = U16_LEAD(c);
     182           0 :       UChar trail = U16_TRAIL(c);
     183           0 :       for(int32_t i = 0; i < length; i += 2) {
     184           0 :         array[i] = lead;
     185           0 :         array[i + 1] = trail;
     186             :       }
     187           0 :       setLength(length);
     188             :     }
     189             :   }
     190             : }
     191             : 
     192           0 : UnicodeString::UnicodeString(UChar ch) {
     193           0 :   fUnion.fFields.fLengthAndFlags = kLength1 | kShortString;
     194           0 :   fUnion.fStackFields.fBuffer[0] = ch;
     195           0 : }
     196             : 
     197           0 : UnicodeString::UnicodeString(UChar32 ch) {
     198           0 :   fUnion.fFields.fLengthAndFlags = kShortString;
     199           0 :   int32_t i = 0;
     200           0 :   UBool isError = FALSE;
     201           0 :   U16_APPEND(fUnion.fStackFields.fBuffer, i, US_STACKBUF_SIZE, ch, isError);
     202             :   // We test isError so that the compiler does not complain that we don't.
     203             :   // If isError then i==0 which is what we want anyway.
     204           0 :   if(!isError) {
     205           0 :     setShortLength(i);
     206             :   }
     207           0 : }
     208             : 
     209           0 : UnicodeString::UnicodeString(const UChar *text) {
     210           0 :   fUnion.fFields.fLengthAndFlags = kShortString;
     211           0 :   doAppend(text, 0, -1);
     212           0 : }
     213             : 
     214           0 : UnicodeString::UnicodeString(const UChar *text,
     215           0 :                              int32_t textLength) {
     216           0 :   fUnion.fFields.fLengthAndFlags = kShortString;
     217           0 :   doAppend(text, 0, textLength);
     218           0 : }
     219             : 
     220           0 : UnicodeString::UnicodeString(UBool isTerminated,
     221             :                              ConstChar16Ptr textPtr,
     222           0 :                              int32_t textLength) {
     223           0 :   fUnion.fFields.fLengthAndFlags = kReadonlyAlias;
     224           0 :   const UChar *text = textPtr;
     225           0 :   if(text == NULL) {
     226             :     // treat as an empty string, do not alias
     227           0 :     setToEmpty();
     228           0 :   } else if(textLength < -1 ||
     229           0 :             (textLength == -1 && !isTerminated) ||
     230           0 :             (textLength >= 0 && isTerminated && text[textLength] != 0)
     231             :   ) {
     232           0 :     setToBogus();
     233             :   } else {
     234           0 :     if(textLength == -1) {
     235             :       // text is terminated, or else it would have failed the above test
     236           0 :       textLength = u_strlen(text);
     237             :     }
     238           0 :     setArray(const_cast<UChar *>(text), textLength,
     239           0 :              isTerminated ? textLength + 1 : textLength);
     240             :   }
     241           0 : }
     242             : 
     243           0 : UnicodeString::UnicodeString(UChar *buff,
     244             :                              int32_t buffLength,
     245           0 :                              int32_t buffCapacity) {
     246           0 :   fUnion.fFields.fLengthAndFlags = kWritableAlias;
     247           0 :   if(buff == NULL) {
     248             :     // treat as an empty string, do not alias
     249           0 :     setToEmpty();
     250           0 :   } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
     251           0 :     setToBogus();
     252             :   } else {
     253           0 :     if(buffLength == -1) {
     254             :       // fLength = u_strlen(buff); but do not look beyond buffCapacity
     255           0 :       const UChar *p = buff, *limit = buff + buffCapacity;
     256           0 :       while(p != limit && *p != 0) {
     257           0 :         ++p;
     258             :       }
     259           0 :       buffLength = (int32_t)(p - buff);
     260             :     }
     261           0 :     setArray(buff, buffLength, buffCapacity);
     262             :   }
     263           0 : }
     264             : 
     265           0 : UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant) {
     266           0 :   fUnion.fFields.fLengthAndFlags = kShortString;
     267           0 :   if(src==NULL) {
     268             :     // treat as an empty string
     269             :   } else {
     270           0 :     if(length<0) {
     271           0 :       length=(int32_t)uprv_strlen(src);
     272             :     }
     273           0 :     if(cloneArrayIfNeeded(length, length, FALSE)) {
     274           0 :       u_charsToUChars(src, getArrayStart(), length);
     275           0 :       setLength(length);
     276             :     } else {
     277           0 :       setToBogus();
     278             :     }
     279             :   }
     280           0 : }
     281             : 
     282             : #if U_CHARSET_IS_UTF8
     283             : 
     284           0 : UnicodeString::UnicodeString(const char *codepageData) {
     285           0 :   fUnion.fFields.fLengthAndFlags = kShortString;
     286           0 :   if(codepageData != 0) {
     287           0 :     setToUTF8(codepageData);
     288             :   }
     289           0 : }
     290             : 
     291           0 : UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength) {
     292           0 :   fUnion.fFields.fLengthAndFlags = kShortString;
     293             :   // if there's nothing to convert, do nothing
     294           0 :   if(codepageData == 0 || dataLength == 0 || dataLength < -1) {
     295           0 :     return;
     296             :   }
     297           0 :   if(dataLength == -1) {
     298           0 :     dataLength = (int32_t)uprv_strlen(codepageData);
     299             :   }
     300           0 :   setToUTF8(StringPiece(codepageData, dataLength));
     301             : }
     302             : 
     303             : // else see unistr_cnv.cpp
     304             : #endif
     305             : 
     306           0 : UnicodeString::UnicodeString(const UnicodeString& that) {
     307           0 :   fUnion.fFields.fLengthAndFlags = kShortString;
     308           0 :   copyFrom(that);
     309           0 : }
     310             : 
     311             : #if U_HAVE_RVALUE_REFERENCES
     312           0 : UnicodeString::UnicodeString(UnicodeString &&src) U_NOEXCEPT {
     313           0 :   fUnion.fFields.fLengthAndFlags = kShortString;
     314           0 :   moveFrom(src);
     315           0 : }
     316             : #endif
     317             : 
     318           0 : UnicodeString::UnicodeString(const UnicodeString& that,
     319           0 :                              int32_t srcStart) {
     320           0 :   fUnion.fFields.fLengthAndFlags = kShortString;
     321           0 :   setTo(that, srcStart);
     322           0 : }
     323             : 
     324           0 : UnicodeString::UnicodeString(const UnicodeString& that,
     325             :                              int32_t srcStart,
     326           0 :                              int32_t srcLength) {
     327           0 :   fUnion.fFields.fLengthAndFlags = kShortString;
     328           0 :   setTo(that, srcStart, srcLength);
     329           0 : }
     330             : 
     331             : // Replaceable base class clone() default implementation, does not clone
     332             : Replaceable *
     333           0 : Replaceable::clone() const {
     334           0 :   return NULL;
     335             : }
     336             : 
     337             : // UnicodeString overrides clone() with a real implementation
     338             : Replaceable *
     339           0 : UnicodeString::clone() const {
     340           0 :   return new UnicodeString(*this);
     341             : }
     342             : 
     343             : //========================================
     344             : // array allocation
     345             : //========================================
     346             : 
     347             : namespace {
     348             : 
     349             : const int32_t kGrowSize = 128;
     350             : 
     351             : // The number of bytes for one int32_t reference counter and capacity UChars
     352             : // must fit into a 32-bit size_t (at least when on a 32-bit platform).
     353             : // We also add one for the NUL terminator, to avoid reallocation in getTerminatedBuffer(),
     354             : // and round up to a multiple of 16 bytes.
     355             : // This means that capacity must be at most (0xfffffff0 - 4) / 2 - 1 = 0x7ffffff5.
     356             : // (With more complicated checks we could go up to 0x7ffffffd without rounding up,
     357             : // but that does not seem worth it.)
     358             : const int32_t kMaxCapacity = 0x7ffffff5;
     359             : 
     360           0 : int32_t getGrowCapacity(int32_t newLength) {
     361           0 :   int32_t growSize = (newLength >> 2) + kGrowSize;
     362           0 :   if(growSize <= (kMaxCapacity - newLength)) {
     363           0 :     return newLength + growSize;
     364             :   } else {
     365           0 :     return kMaxCapacity;
     366             :   }
     367             : }
     368             : 
     369             : }  // namespace
     370             : 
     371             : UBool
     372           0 : UnicodeString::allocate(int32_t capacity) {
     373           0 :   if(capacity <= US_STACKBUF_SIZE) {
     374           0 :     fUnion.fFields.fLengthAndFlags = kShortString;
     375           0 :     return TRUE;
     376             :   }
     377           0 :   if(capacity <= kMaxCapacity) {
     378           0 :     ++capacity;  // for the NUL
     379             :     // Switch to size_t which is unsigned so that we can allocate up to 4GB.
     380             :     // Reference counter + UChars.
     381           0 :     size_t numBytes = sizeof(int32_t) + (size_t)capacity * U_SIZEOF_UCHAR;
     382             :     // Round up to a multiple of 16.
     383           0 :     numBytes = (numBytes + 15) & ~15;
     384           0 :     int32_t *array = (int32_t *) uprv_malloc(numBytes);
     385           0 :     if(array != NULL) {
     386             :       // set initial refCount and point behind the refCount
     387           0 :       *array++ = 1;
     388           0 :       numBytes -= sizeof(int32_t);
     389             : 
     390             :       // have fArray point to the first UChar
     391           0 :       fUnion.fFields.fArray = (UChar *)array;
     392           0 :       fUnion.fFields.fCapacity = (int32_t)(numBytes / U_SIZEOF_UCHAR);
     393           0 :       fUnion.fFields.fLengthAndFlags = kLongString;
     394           0 :       return TRUE;
     395             :     }
     396             :   }
     397           0 :   fUnion.fFields.fLengthAndFlags = kIsBogus;
     398           0 :   fUnion.fFields.fArray = 0;
     399           0 :   fUnion.fFields.fCapacity = 0;
     400           0 :   return FALSE;
     401             : }
     402             : 
     403             : //========================================
     404             : // Destructor
     405             : //========================================
     406             : 
     407             : #ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
     408             : static u_atomic_int32_t finalLengthCounts[0x400];  // UnicodeString::kMaxShortLength+1
     409             : static u_atomic_int32_t beyondCount(0);
     410             : 
     411             : U_CAPI void unistr_printLengths() {
     412             :   int32_t i;
     413             :   for(i = 0; i <= 59; ++i) {
     414             :     printf("%2d,  %9d\n", i, (int32_t)finalLengthCounts[i]);
     415             :   }
     416             :   int32_t beyond = beyondCount;
     417             :   for(; i < UPRV_LENGTHOF(finalLengthCounts); ++i) {
     418             :     beyond += finalLengthCounts[i];
     419             :   }
     420             :   printf(">59, %9d\n", beyond);
     421             : }
     422             : #endif
     423             : 
     424           0 : UnicodeString::~UnicodeString()
     425             : {
     426             : #ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
     427             :   // Count lengths of strings at the end of their lifetime.
     428             :   // Useful for discussion of a desirable stack buffer size.
     429             :   // Count the contents length, not the optional NUL terminator nor further capacity.
     430             :   // Ignore open-buffer strings and strings which alias external storage.
     431             :   if((fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kReadonlyAlias|kWritableAlias)) == 0) {
     432             :     if(hasShortLength()) {
     433             :       umtx_atomic_inc(finalLengthCounts + getShortLength());
     434             :     } else {
     435             :       umtx_atomic_inc(&beyondCount);
     436             :     }
     437             :   }
     438             : #endif
     439             : 
     440           0 :   releaseArray();
     441           0 : }
     442             : 
     443             : //========================================
     444             : // Factory methods
     445             : //========================================
     446             : 
     447           0 : UnicodeString UnicodeString::fromUTF8(StringPiece utf8) {
     448           0 :   UnicodeString result;
     449           0 :   result.setToUTF8(utf8);
     450           0 :   return result;
     451             : }
     452             : 
     453           0 : UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) {
     454           0 :   UnicodeString result;
     455             :   int32_t capacity;
     456             :   // Most UTF-32 strings will be BMP-only and result in a same-length
     457             :   // UTF-16 string. We overestimate the capacity just slightly,
     458             :   // just in case there are a few supplementary characters.
     459           0 :   if(length <= US_STACKBUF_SIZE) {
     460           0 :     capacity = US_STACKBUF_SIZE;
     461             :   } else {
     462           0 :     capacity = length + (length >> 4) + 4;
     463             :   }
     464             :   do {
     465           0 :     UChar *utf16 = result.getBuffer(capacity);
     466             :     int32_t length16;
     467           0 :     UErrorCode errorCode = U_ZERO_ERROR;
     468           0 :     u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16,
     469             :         utf32, length,
     470             :         0xfffd,  // Substitution character.
     471             :         NULL,    // Don't care about number of substitutions.
     472           0 :         &errorCode);
     473           0 :     result.releaseBuffer(length16);
     474           0 :     if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
     475           0 :       capacity = length16 + 1;  // +1 for the terminating NUL.
     476           0 :       continue;
     477           0 :     } else if(U_FAILURE(errorCode)) {
     478           0 :       result.setToBogus();
     479             :     }
     480           0 :     break;
     481             :   } while(TRUE);
     482           0 :   return result;
     483             : }
     484             : 
     485             : //========================================
     486             : // Assignment
     487             : //========================================
     488             : 
     489             : UnicodeString &
     490           0 : UnicodeString::operator=(const UnicodeString &src) {
     491           0 :   return copyFrom(src);
     492             : }
     493             : 
     494             : UnicodeString &
     495           0 : UnicodeString::fastCopyFrom(const UnicodeString &src) {
     496           0 :   return copyFrom(src, TRUE);
     497             : }
     498             : 
     499             : UnicodeString &
     500           0 : UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {
     501             :   // if assigning to ourselves, do nothing
     502           0 :   if(this == &src) {
     503           0 :     return *this;
     504             :   }
     505             : 
     506             :   // is the right side bogus?
     507           0 :   if(src.isBogus()) {
     508           0 :     setToBogus();
     509           0 :     return *this;
     510             :   }
     511             : 
     512             :   // delete the current contents
     513           0 :   releaseArray();
     514             : 
     515           0 :   if(src.isEmpty()) {
     516             :     // empty string - use the stack buffer
     517           0 :     setToEmpty();
     518           0 :     return *this;
     519             :   }
     520             : 
     521             :   // fLength>0 and not an "open" src.getBuffer(minCapacity)
     522           0 :   fUnion.fFields.fLengthAndFlags = src.fUnion.fFields.fLengthAndFlags;
     523           0 :   switch(src.fUnion.fFields.fLengthAndFlags & kAllStorageFlags) {
     524             :   case kShortString:
     525             :     // short string using the stack buffer, do the same
     526           0 :     uprv_memcpy(fUnion.fStackFields.fBuffer, src.fUnion.fStackFields.fBuffer,
     527           0 :                 getShortLength() * U_SIZEOF_UCHAR);
     528           0 :     break;
     529             :   case kLongString:
     530             :     // src uses a refCounted string buffer, use that buffer with refCount
     531             :     // src is const, use a cast - we don't actually change it
     532           0 :     ((UnicodeString &)src).addRef();
     533             :     // copy all fields, share the reference-counted buffer
     534           0 :     fUnion.fFields.fArray = src.fUnion.fFields.fArray;
     535           0 :     fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
     536           0 :     if(!hasShortLength()) {
     537           0 :       fUnion.fFields.fLength = src.fUnion.fFields.fLength;
     538             :     }
     539           0 :     break;
     540             :   case kReadonlyAlias:
     541           0 :     if(fastCopy) {
     542             :       // src is a readonly alias, do the same
     543             :       // -> maintain the readonly alias as such
     544           0 :       fUnion.fFields.fArray = src.fUnion.fFields.fArray;
     545           0 :       fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
     546           0 :       if(!hasShortLength()) {
     547           0 :         fUnion.fFields.fLength = src.fUnion.fFields.fLength;
     548             :       }
     549           0 :       break;
     550             :     }
     551             :     // else if(!fastCopy) fall through to case kWritableAlias
     552             :     // -> allocate a new buffer and copy the contents
     553             :     U_FALLTHROUGH;
     554             :   case kWritableAlias: {
     555             :     // src is a writable alias; we make a copy of that instead
     556           0 :     int32_t srcLength = src.length();
     557           0 :     if(allocate(srcLength)) {
     558           0 :       u_memcpy(getArrayStart(), src.getArrayStart(), srcLength);
     559           0 :       setLength(srcLength);
     560           0 :       break;
     561             :     }
     562             :     // if there is not enough memory, then fall through to setting to bogus
     563             :     U_FALLTHROUGH;
     564             :   }
     565             :   default:
     566             :     // if src is bogus, set ourselves to bogus
     567             :     // do not call setToBogus() here because fArray and flags are not consistent here
     568           0 :     fUnion.fFields.fLengthAndFlags = kIsBogus;
     569           0 :     fUnion.fFields.fArray = 0;
     570           0 :     fUnion.fFields.fCapacity = 0;
     571           0 :     break;
     572             :   }
     573             : 
     574           0 :   return *this;
     575             : }
     576             : 
     577           0 : UnicodeString &UnicodeString::moveFrom(UnicodeString &src) U_NOEXCEPT {
     578             :   // No explicit check for self move assignment, consistent with standard library.
     579             :   // Self move assignment causes no crash nor leak but might make the object bogus.
     580           0 :   releaseArray();
     581           0 :   copyFieldsFrom(src, TRUE);
     582           0 :   return *this;
     583             : }
     584             : 
     585             : // Same as moveFrom() except without memory management.
     586           0 : void UnicodeString::copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) U_NOEXCEPT {
     587           0 :   int16_t lengthAndFlags = fUnion.fFields.fLengthAndFlags = src.fUnion.fFields.fLengthAndFlags;
     588           0 :   if(lengthAndFlags & kUsingStackBuffer) {
     589             :     // Short string using the stack buffer, copy the contents.
     590             :     // Check for self assignment to prevent "overlap in memcpy" warnings,
     591             :     // although it should be harmless to copy a buffer to itself exactly.
     592           0 :     if(this != &src) {
     593           0 :       uprv_memcpy(fUnion.fStackFields.fBuffer, src.fUnion.fStackFields.fBuffer,
     594           0 :                   getShortLength() * U_SIZEOF_UCHAR);
     595             :     }
     596             :   } else {
     597             :     // In all other cases, copy all fields.
     598           0 :     fUnion.fFields.fArray = src.fUnion.fFields.fArray;
     599           0 :     fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
     600           0 :     if(!hasShortLength()) {
     601           0 :       fUnion.fFields.fLength = src.fUnion.fFields.fLength;
     602             :     }
     603           0 :     if(setSrcToBogus) {
     604             :       // Set src to bogus without releasing any memory.
     605           0 :       src.fUnion.fFields.fLengthAndFlags = kIsBogus;
     606           0 :       src.fUnion.fFields.fArray = NULL;
     607           0 :       src.fUnion.fFields.fCapacity = 0;
     608             :     }
     609             :   }
     610           0 : }
     611             : 
     612           0 : void UnicodeString::swap(UnicodeString &other) U_NOEXCEPT {
     613           0 :   UnicodeString temp;  // Empty short string: Known not to need releaseArray().
     614             :   // Copy fields without resetting source values in between.
     615           0 :   temp.copyFieldsFrom(*this, FALSE);
     616           0 :   this->copyFieldsFrom(other, FALSE);
     617           0 :   other.copyFieldsFrom(temp, FALSE);
     618             :   // Set temp to an empty string so that other's memory is not released twice.
     619           0 :   temp.fUnion.fFields.fLengthAndFlags = kShortString;
     620           0 : }
     621             : 
     622             : //========================================
     623             : // Miscellaneous operations
     624             : //========================================
     625             : 
     626           0 : UnicodeString UnicodeString::unescape() const {
     627           0 :     UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity
     628           0 :     if (result.isBogus()) {
     629           0 :         return result;
     630             :     }
     631           0 :     const UChar *array = getBuffer();
     632           0 :     int32_t len = length();
     633           0 :     int32_t prev = 0;
     634           0 :     for (int32_t i=0;;) {
     635           0 :         if (i == len) {
     636           0 :             result.append(array, prev, len - prev);
     637           0 :             break;
     638             :         }
     639           0 :         if (array[i++] == 0x5C /*'\\'*/) {
     640           0 :             result.append(array, prev, (i - 1) - prev);
     641           0 :             UChar32 c = unescapeAt(i); // advances i
     642           0 :             if (c < 0) {
     643           0 :                 result.remove(); // return empty string
     644           0 :                 break; // invalid escape sequence
     645             :             }
     646           0 :             result.append(c);
     647           0 :             prev = i;
     648             :         }
     649           0 :     }
     650           0 :     return result;
     651             : }
     652             : 
     653           0 : UChar32 UnicodeString::unescapeAt(int32_t &offset) const {
     654           0 :     return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this);
     655             : }
     656             : 
     657             : //========================================
     658             : // Read-only implementation
     659             : //========================================
     660             : UBool
     661           0 : UnicodeString::doEquals(const UnicodeString &text, int32_t len) const {
     662             :   // Requires: this & text not bogus and have same lengths.
     663             :   // Byte-wise comparison works for equality regardless of endianness.
     664           0 :   return uprv_memcmp(getArrayStart(), text.getArrayStart(), len * U_SIZEOF_UCHAR) == 0;
     665             : }
     666             : 
     667             : int8_t
     668           0 : UnicodeString::doCompare( int32_t start,
     669             :               int32_t length,
     670             :               const UChar *srcChars,
     671             :               int32_t srcStart,
     672             :               int32_t srcLength) const
     673             : {
     674             :   // compare illegal string values
     675           0 :   if(isBogus()) {
     676           0 :     return -1;
     677             :   }
     678             :   
     679             :   // pin indices to legal values
     680           0 :   pinIndices(start, length);
     681             : 
     682           0 :   if(srcChars == NULL) {
     683             :     // treat const UChar *srcChars==NULL as an empty string
     684           0 :     return length == 0 ? 0 : 1;
     685             :   }
     686             : 
     687             :   // get the correct pointer
     688           0 :   const UChar *chars = getArrayStart();
     689             : 
     690           0 :   chars += start;
     691           0 :   srcChars += srcStart;
     692             : 
     693             :   int32_t minLength;
     694             :   int8_t lengthResult;
     695             : 
     696             :   // get the srcLength if necessary
     697           0 :   if(srcLength < 0) {
     698           0 :     srcLength = u_strlen(srcChars + srcStart);
     699             :   }
     700             : 
     701             :   // are we comparing different lengths?
     702           0 :   if(length != srcLength) {
     703           0 :     if(length < srcLength) {
     704           0 :       minLength = length;
     705           0 :       lengthResult = -1;
     706             :     } else {
     707           0 :       minLength = srcLength;
     708           0 :       lengthResult = 1;
     709             :     }
     710             :   } else {
     711           0 :     minLength = length;
     712           0 :     lengthResult = 0;
     713             :   }
     714             : 
     715             :   /*
     716             :    * note that uprv_memcmp() returns an int but we return an int8_t;
     717             :    * we need to take care not to truncate the result -
     718             :    * one way to do this is to right-shift the value to
     719             :    * move the sign bit into the lower 8 bits and making sure that this
     720             :    * does not become 0 itself
     721             :    */
     722             : 
     723           0 :   if(minLength > 0 && chars != srcChars) {
     724             :     int32_t result;
     725             : 
     726             : #   if U_IS_BIG_ENDIAN 
     727             :       // big-endian: byte comparison works
     728             :       result = uprv_memcmp(chars, srcChars, minLength * sizeof(UChar));
     729             :       if(result != 0) {
     730             :         return (int8_t)(result >> 15 | 1);
     731             :       }
     732             : #   else
     733             :       // little-endian: compare UChar units
     734           0 :       do {
     735           0 :         result = ((int32_t)*(chars++) - (int32_t)*(srcChars++));
     736           0 :         if(result != 0) {
     737           0 :           return (int8_t)(result >> 15 | 1);
     738             :         }
     739             :       } while(--minLength > 0);
     740             : #   endif
     741             :   }
     742           0 :   return lengthResult;
     743             : }
     744             : 
     745             : /* String compare in code point order - doCompare() compares in code unit order. */
     746             : int8_t
     747           0 : UnicodeString::doCompareCodePointOrder(int32_t start,
     748             :                                        int32_t length,
     749             :                                        const UChar *srcChars,
     750             :                                        int32_t srcStart,
     751             :                                        int32_t srcLength) const
     752             : {
     753             :   // compare illegal string values
     754             :   // treat const UChar *srcChars==NULL as an empty string
     755           0 :   if(isBogus()) {
     756           0 :     return -1;
     757             :   }
     758             : 
     759             :   // pin indices to legal values
     760           0 :   pinIndices(start, length);
     761             : 
     762           0 :   if(srcChars == NULL) {
     763           0 :     srcStart = srcLength = 0;
     764             :   }
     765             : 
     766           0 :   int32_t diff = uprv_strCompare(getArrayStart() + start, length, (srcChars!=NULL)?(srcChars + srcStart):NULL, srcLength, FALSE, TRUE);
     767             :   /* translate the 32-bit result into an 8-bit one */
     768           0 :   if(diff!=0) {
     769           0 :     return (int8_t)(diff >> 15 | 1);
     770             :   } else {
     771           0 :     return 0;
     772             :   }
     773             : }
     774             : 
     775             : int32_t
     776           0 : UnicodeString::getLength() const {
     777           0 :     return length();
     778             : }
     779             : 
     780             : UChar
     781           0 : UnicodeString::getCharAt(int32_t offset) const {
     782           0 :   return charAt(offset);
     783             : }
     784             : 
     785             : UChar32
     786           0 : UnicodeString::getChar32At(int32_t offset) const {
     787           0 :   return char32At(offset);
     788             : }
     789             : 
     790             : UChar32
     791           0 : UnicodeString::char32At(int32_t offset) const
     792             : {
     793           0 :   int32_t len = length();
     794           0 :   if((uint32_t)offset < (uint32_t)len) {
     795           0 :     const UChar *array = getArrayStart();
     796             :     UChar32 c;
     797           0 :     U16_GET(array, 0, offset, len, c);
     798           0 :     return c;
     799             :   } else {
     800           0 :     return kInvalidUChar;
     801             :   }
     802             : }
     803             : 
     804             : int32_t
     805           0 : UnicodeString::getChar32Start(int32_t offset) const {
     806           0 :   if((uint32_t)offset < (uint32_t)length()) {
     807           0 :     const UChar *array = getArrayStart();
     808           0 :     U16_SET_CP_START(array, 0, offset);
     809           0 :     return offset;
     810             :   } else {
     811           0 :     return 0;
     812             :   }
     813             : }
     814             : 
     815             : int32_t
     816           0 : UnicodeString::getChar32Limit(int32_t offset) const {
     817           0 :   int32_t len = length();
     818           0 :   if((uint32_t)offset < (uint32_t)len) {
     819           0 :     const UChar *array = getArrayStart();
     820           0 :     U16_SET_CP_LIMIT(array, 0, offset, len);
     821           0 :     return offset;
     822             :   } else {
     823           0 :     return len;
     824             :   }
     825             : }
     826             : 
     827             : int32_t
     828           0 : UnicodeString::countChar32(int32_t start, int32_t length) const {
     829           0 :   pinIndices(start, length);
     830             :   // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL
     831           0 :   return u_countChar32(getArrayStart()+start, length);
     832             : }
     833             : 
     834             : UBool
     835           0 : UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const {
     836           0 :   pinIndices(start, length);
     837             :   // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL
     838           0 :   return u_strHasMoreChar32Than(getArrayStart()+start, length, number);
     839             : }
     840             : 
     841             : int32_t
     842           0 : UnicodeString::moveIndex32(int32_t index, int32_t delta) const {
     843             :   // pin index
     844           0 :   int32_t len = length();
     845           0 :   if(index<0) {
     846           0 :     index=0;
     847           0 :   } else if(index>len) {
     848           0 :     index=len;
     849             :   }
     850             : 
     851           0 :   const UChar *array = getArrayStart();
     852           0 :   if(delta>0) {
     853           0 :     U16_FWD_N(array, index, len, delta);
     854             :   } else {
     855           0 :     U16_BACK_N(array, 0, index, -delta);
     856             :   }
     857             : 
     858           0 :   return index;
     859             : }
     860             : 
     861             : void
     862           0 : UnicodeString::doExtract(int32_t start,
     863             :              int32_t length,
     864             :              UChar *dst,
     865             :              int32_t dstStart) const
     866             : {
     867             :   // pin indices to legal values
     868           0 :   pinIndices(start, length);
     869             : 
     870             :   // do not copy anything if we alias dst itself
     871           0 :   const UChar *array = getArrayStart();
     872           0 :   if(array + start != dst + dstStart) {
     873           0 :     us_arrayCopy(array, start, dst, dstStart, length);
     874             :   }
     875           0 : }
     876             : 
     877             : int32_t
     878           0 : UnicodeString::extract(Char16Ptr dest, int32_t destCapacity,
     879             :                        UErrorCode &errorCode) const {
     880           0 :   int32_t len = length();
     881           0 :   if(U_SUCCESS(errorCode)) {
     882           0 :     if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
     883           0 :       errorCode=U_ILLEGAL_ARGUMENT_ERROR;
     884             :     } else {
     885           0 :       const UChar *array = getArrayStart();
     886           0 :       if(len>0 && len<=destCapacity && array!=dest) {
     887           0 :         u_memcpy(dest, array, len);
     888             :       }
     889           0 :       return u_terminateUChars(dest, destCapacity, len, &errorCode);
     890             :     }
     891             :   }
     892             : 
     893           0 :   return len;
     894             : }
     895             : 
     896             : int32_t
     897           0 : UnicodeString::extract(int32_t start,
     898             :                        int32_t length,
     899             :                        char *target,
     900             :                        int32_t targetCapacity,
     901             :                        enum EInvariant) const
     902             : {
     903             :   // if the arguments are illegal, then do nothing
     904           0 :   if(targetCapacity < 0 || (targetCapacity > 0 && target == NULL)) {
     905           0 :     return 0;
     906             :   }
     907             : 
     908             :   // pin the indices to legal values
     909           0 :   pinIndices(start, length);
     910             : 
     911           0 :   if(length <= targetCapacity) {
     912           0 :     u_UCharsToChars(getArrayStart() + start, target, length);
     913             :   }
     914           0 :   UErrorCode status = U_ZERO_ERROR;
     915           0 :   return u_terminateChars(target, targetCapacity, length, &status);
     916             : }
     917             : 
     918             : UnicodeString
     919           0 : UnicodeString::tempSubString(int32_t start, int32_t len) const {
     920           0 :   pinIndices(start, len);
     921           0 :   const UChar *array = getBuffer();  // not getArrayStart() to check kIsBogus & kOpenGetBuffer
     922           0 :   if(array==NULL) {
     923           0 :     array=fUnion.fStackFields.fBuffer;  // anything not NULL because that would make an empty string
     924           0 :     len=-2;  // bogus result string
     925             :   }
     926           0 :   return UnicodeString(FALSE, array + start, len);
     927             : }
     928             : 
     929             : int32_t
     930           0 : UnicodeString::toUTF8(int32_t start, int32_t len,
     931             :                       char *target, int32_t capacity) const {
     932           0 :   pinIndices(start, len);
     933             :   int32_t length8;
     934           0 :   UErrorCode errorCode = U_ZERO_ERROR;
     935           0 :   u_strToUTF8WithSub(target, capacity, &length8,
     936           0 :                      getBuffer() + start, len,
     937             :                      0xFFFD,  // Standard substitution character.
     938             :                      NULL,    // Don't care about number of substitutions.
     939           0 :                      &errorCode);
     940           0 :   return length8;
     941             : }
     942             : 
     943             : #if U_CHARSET_IS_UTF8
     944             : 
     945             : int32_t
     946           0 : UnicodeString::extract(int32_t start, int32_t len,
     947             :                        char *target, uint32_t dstSize) const {
     948             :   // if the arguments are illegal, then do nothing
     949           0 :   if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) {
     950           0 :     return 0;
     951             :   }
     952           0 :   return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0x7fffffff);
     953             : }
     954             : 
     955             : // else see unistr_cnv.cpp
     956             : #endif
     957             : 
     958             : void 
     959           0 : UnicodeString::extractBetween(int32_t start,
     960             :                   int32_t limit,
     961             :                   UnicodeString& target) const {
     962           0 :   pinIndex(start);
     963           0 :   pinIndex(limit);
     964           0 :   doExtract(start, limit - start, target);
     965           0 : }
     966             : 
     967             : // When converting from UTF-16 to UTF-8, the result will have at most 3 times
     968             : // as many bytes as the source has UChars.
     969             : // The "worst cases" are writing systems like Indic, Thai and CJK with
     970             : // 3:1 bytes:UChars.
     971             : void
     972           0 : UnicodeString::toUTF8(ByteSink &sink) const {
     973           0 :   int32_t length16 = length();
     974           0 :   if(length16 != 0) {
     975             :     char stackBuffer[1024];
     976           0 :     int32_t capacity = (int32_t)sizeof(stackBuffer);
     977           0 :     UBool utf8IsOwned = FALSE;
     978           0 :     char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity,
     979             :                                       3*length16,
     980             :                                       stackBuffer, capacity,
     981           0 :                                       &capacity);
     982           0 :     int32_t length8 = 0;
     983           0 :     UErrorCode errorCode = U_ZERO_ERROR;
     984           0 :     u_strToUTF8WithSub(utf8, capacity, &length8,
     985             :                        getBuffer(), length16,
     986             :                        0xFFFD,  // Standard substitution character.
     987             :                        NULL,    // Don't care about number of substitutions.
     988           0 :                        &errorCode);
     989           0 :     if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
     990           0 :       utf8 = (char *)uprv_malloc(length8);
     991           0 :       if(utf8 != NULL) {
     992           0 :         utf8IsOwned = TRUE;
     993           0 :         errorCode = U_ZERO_ERROR;
     994           0 :         u_strToUTF8WithSub(utf8, length8, &length8,
     995             :                            getBuffer(), length16,
     996             :                            0xFFFD,  // Standard substitution character.
     997             :                            NULL,    // Don't care about number of substitutions.
     998           0 :                            &errorCode);
     999             :       } else {
    1000           0 :         errorCode = U_MEMORY_ALLOCATION_ERROR;
    1001             :       }
    1002             :     }
    1003           0 :     if(U_SUCCESS(errorCode)) {
    1004           0 :       sink.Append(utf8, length8);
    1005           0 :       sink.Flush();
    1006             :     }
    1007           0 :     if(utf8IsOwned) {
    1008           0 :       uprv_free(utf8);
    1009             :     }
    1010             :   }
    1011           0 : }
    1012             : 
    1013             : int32_t
    1014           0 : UnicodeString::toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const {
    1015           0 :   int32_t length32=0;
    1016           0 :   if(U_SUCCESS(errorCode)) {
    1017             :     // getBuffer() and u_strToUTF32WithSub() check for illegal arguments.
    1018           0 :     u_strToUTF32WithSub(utf32, capacity, &length32,
    1019             :         getBuffer(), length(),
    1020             :         0xfffd,  // Substitution character.
    1021             :         NULL,    // Don't care about number of substitutions.
    1022           0 :         &errorCode);
    1023             :   }
    1024           0 :   return length32;
    1025             : }
    1026             : 
    1027             : int32_t 
    1028           0 : UnicodeString::indexOf(const UChar *srcChars,
    1029             :                int32_t srcStart,
    1030             :                int32_t srcLength,
    1031             :                int32_t start,
    1032             :                int32_t length) const
    1033             : {
    1034           0 :   if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
    1035           0 :     return -1;
    1036             :   }
    1037             : 
    1038             :   // UnicodeString does not find empty substrings
    1039           0 :   if(srcLength < 0 && srcChars[srcStart] == 0) {
    1040           0 :     return -1;
    1041             :   }
    1042             : 
    1043             :   // get the indices within bounds
    1044           0 :   pinIndices(start, length);
    1045             : 
    1046             :   // find the first occurrence of the substring
    1047           0 :   const UChar *array = getArrayStart();
    1048           0 :   const UChar *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength);
    1049           0 :   if(match == NULL) {
    1050           0 :     return -1;
    1051             :   } else {
    1052           0 :     return (int32_t)(match - array);
    1053             :   }
    1054             : }
    1055             : 
    1056             : int32_t
    1057           0 : UnicodeString::doIndexOf(UChar c,
    1058             :              int32_t start,
    1059             :              int32_t length) const
    1060             : {
    1061             :   // pin indices
    1062           0 :   pinIndices(start, length);
    1063             : 
    1064             :   // find the first occurrence of c
    1065           0 :   const UChar *array = getArrayStart();
    1066           0 :   const UChar *match = u_memchr(array + start, c, length);
    1067           0 :   if(match == NULL) {
    1068           0 :     return -1;
    1069             :   } else {
    1070           0 :     return (int32_t)(match - array);
    1071             :   }
    1072             : }
    1073             : 
    1074             : int32_t
    1075           0 : UnicodeString::doIndexOf(UChar32 c,
    1076             :                          int32_t start,
    1077             :                          int32_t length) const {
    1078             :   // pin indices
    1079           0 :   pinIndices(start, length);
    1080             : 
    1081             :   // find the first occurrence of c
    1082           0 :   const UChar *array = getArrayStart();
    1083           0 :   const UChar *match = u_memchr32(array + start, c, length);
    1084           0 :   if(match == NULL) {
    1085           0 :     return -1;
    1086             :   } else {
    1087           0 :     return (int32_t)(match - array);
    1088             :   }
    1089             : }
    1090             : 
    1091             : int32_t 
    1092           0 : UnicodeString::lastIndexOf(const UChar *srcChars,
    1093             :                int32_t srcStart,
    1094             :                int32_t srcLength,
    1095             :                int32_t start,
    1096             :                int32_t length) const
    1097             : {
    1098           0 :   if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
    1099           0 :     return -1;
    1100             :   }
    1101             : 
    1102             :   // UnicodeString does not find empty substrings
    1103           0 :   if(srcLength < 0 && srcChars[srcStart] == 0) {
    1104           0 :     return -1;
    1105             :   }
    1106             : 
    1107             :   // get the indices within bounds
    1108           0 :   pinIndices(start, length);
    1109             : 
    1110             :   // find the last occurrence of the substring
    1111           0 :   const UChar *array = getArrayStart();
    1112           0 :   const UChar *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength);
    1113           0 :   if(match == NULL) {
    1114           0 :     return -1;
    1115             :   } else {
    1116           0 :     return (int32_t)(match - array);
    1117             :   }
    1118             : }
    1119             : 
    1120             : int32_t
    1121           0 : UnicodeString::doLastIndexOf(UChar c,
    1122             :                  int32_t start,
    1123             :                  int32_t length) const
    1124             : {
    1125           0 :   if(isBogus()) {
    1126           0 :     return -1;
    1127             :   }
    1128             : 
    1129             :   // pin indices
    1130           0 :   pinIndices(start, length);
    1131             : 
    1132             :   // find the last occurrence of c
    1133           0 :   const UChar *array = getArrayStart();
    1134           0 :   const UChar *match = u_memrchr(array + start, c, length);
    1135           0 :   if(match == NULL) {
    1136           0 :     return -1;
    1137             :   } else {
    1138           0 :     return (int32_t)(match - array);
    1139             :   }
    1140             : }
    1141             : 
    1142             : int32_t
    1143           0 : UnicodeString::doLastIndexOf(UChar32 c,
    1144             :                              int32_t start,
    1145             :                              int32_t length) const {
    1146             :   // pin indices
    1147           0 :   pinIndices(start, length);
    1148             : 
    1149             :   // find the last occurrence of c
    1150           0 :   const UChar *array = getArrayStart();
    1151           0 :   const UChar *match = u_memrchr32(array + start, c, length);
    1152           0 :   if(match == NULL) {
    1153           0 :     return -1;
    1154             :   } else {
    1155           0 :     return (int32_t)(match - array);
    1156             :   }
    1157             : }
    1158             : 
    1159             : //========================================
    1160             : // Write implementation
    1161             : //========================================
    1162             : 
    1163             : UnicodeString& 
    1164           0 : UnicodeString::findAndReplace(int32_t start,
    1165             :                   int32_t length,
    1166             :                   const UnicodeString& oldText,
    1167             :                   int32_t oldStart,
    1168             :                   int32_t oldLength,
    1169             :                   const UnicodeString& newText,
    1170             :                   int32_t newStart,
    1171             :                   int32_t newLength)
    1172             : {
    1173           0 :   if(isBogus() || oldText.isBogus() || newText.isBogus()) {
    1174           0 :     return *this;
    1175             :   }
    1176             : 
    1177           0 :   pinIndices(start, length);
    1178           0 :   oldText.pinIndices(oldStart, oldLength);
    1179           0 :   newText.pinIndices(newStart, newLength);
    1180             : 
    1181           0 :   if(oldLength == 0) {
    1182           0 :     return *this;
    1183             :   }
    1184             : 
    1185           0 :   while(length > 0 && length >= oldLength) {
    1186           0 :     int32_t pos = indexOf(oldText, oldStart, oldLength, start, length);
    1187           0 :     if(pos < 0) {
    1188             :       // no more oldText's here: done
    1189           0 :       break;
    1190             :     } else {
    1191             :       // we found oldText, replace it by newText and go beyond it
    1192           0 :       replace(pos, oldLength, newText, newStart, newLength);
    1193           0 :       length -= pos + oldLength - start;
    1194           0 :       start = pos + newLength;
    1195             :     }
    1196             :   }
    1197             : 
    1198           0 :   return *this;
    1199             : }
    1200             : 
    1201             : 
    1202             : void
    1203           0 : UnicodeString::setToBogus()
    1204             : {
    1205           0 :   releaseArray();
    1206             : 
    1207           0 :   fUnion.fFields.fLengthAndFlags = kIsBogus;
    1208           0 :   fUnion.fFields.fArray = 0;
    1209           0 :   fUnion.fFields.fCapacity = 0;
    1210           0 : }
    1211             : 
    1212             : // turn a bogus string into an empty one
    1213             : void
    1214           0 : UnicodeString::unBogus() {
    1215           0 :   if(fUnion.fFields.fLengthAndFlags & kIsBogus) {
    1216           0 :     setToEmpty();
    1217             :   }
    1218           0 : }
    1219             : 
    1220             : const char16_t *
    1221           0 : UnicodeString::getTerminatedBuffer() {
    1222           0 :   if(!isWritable()) {
    1223           0 :     return nullptr;
    1224             :   }
    1225           0 :   UChar *array = getArrayStart();
    1226           0 :   int32_t len = length();
    1227           0 :   if(len < getCapacity()) {
    1228           0 :     if(fUnion.fFields.fLengthAndFlags & kBufferIsReadonly) {
    1229             :       // If len<capacity on a read-only alias, then array[len] is
    1230             :       // either the original NUL (if constructed with (TRUE, s, length))
    1231             :       // or one of the original string contents characters (if later truncated),
    1232             :       // therefore we can assume that array[len] is initialized memory.
    1233           0 :       if(array[len] == 0) {
    1234           0 :         return array;
    1235             :       }
    1236           0 :     } else if(((fUnion.fFields.fLengthAndFlags & kRefCounted) == 0 || refCount() == 1)) {
    1237             :       // kRefCounted: Do not write the NUL if the buffer is shared.
    1238             :       // That is mostly safe, except when the length of one copy was modified
    1239             :       // without copy-on-write, e.g., via truncate(newLength) or remove(void).
    1240             :       // Then the NUL would be written into the middle of another copy's string.
    1241             : 
    1242             :       // Otherwise, the buffer is fully writable and it is anyway safe to write the NUL.
    1243             :       // Do not test if there is a NUL already because it might be uninitialized memory.
    1244             :       // (That would be safe, but tools like valgrind & Purify would complain.)
    1245           0 :       array[len] = 0;
    1246           0 :       return array;
    1247             :     }
    1248             :   }
    1249           0 :   if(len<INT32_MAX && cloneArrayIfNeeded(len+1)) {
    1250           0 :     array = getArrayStart();
    1251           0 :     array[len] = 0;
    1252           0 :     return array;
    1253             :   } else {
    1254           0 :     return nullptr;
    1255             :   }
    1256             : }
    1257             : 
    1258             : // setTo() analogous to the readonly-aliasing constructor with the same signature
    1259             : UnicodeString &
    1260           0 : UnicodeString::setTo(UBool isTerminated,
    1261             :                      ConstChar16Ptr textPtr,
    1262             :                      int32_t textLength)
    1263             : {
    1264           0 :   if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) {
    1265             :     // do not modify a string that has an "open" getBuffer(minCapacity)
    1266           0 :     return *this;
    1267             :   }
    1268             : 
    1269           0 :   const UChar *text = textPtr;
    1270           0 :   if(text == NULL) {
    1271             :     // treat as an empty string, do not alias
    1272           0 :     releaseArray();
    1273           0 :     setToEmpty();
    1274           0 :     return *this;
    1275             :   }
    1276             : 
    1277           0 :   if( textLength < -1 ||
    1278           0 :       (textLength == -1 && !isTerminated) ||
    1279           0 :       (textLength >= 0 && isTerminated && text[textLength] != 0)
    1280             :   ) {
    1281           0 :     setToBogus();
    1282           0 :     return *this;
    1283             :   }
    1284             : 
    1285           0 :   releaseArray();
    1286             : 
    1287           0 :   if(textLength == -1) {
    1288             :     // text is terminated, or else it would have failed the above test
    1289           0 :     textLength = u_strlen(text);
    1290             :   }
    1291           0 :   fUnion.fFields.fLengthAndFlags = kReadonlyAlias;
    1292           0 :   setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
    1293           0 :   return *this;
    1294             : }
    1295             : 
    1296             : // setTo() analogous to the writable-aliasing constructor with the same signature
    1297             : UnicodeString &
    1298           0 : UnicodeString::setTo(UChar *buffer,
    1299             :                      int32_t buffLength,
    1300             :                      int32_t buffCapacity) {
    1301           0 :   if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) {
    1302             :     // do not modify a string that has an "open" getBuffer(minCapacity)
    1303           0 :     return *this;
    1304             :   }
    1305             : 
    1306           0 :   if(buffer == NULL) {
    1307             :     // treat as an empty string, do not alias
    1308           0 :     releaseArray();
    1309           0 :     setToEmpty();
    1310           0 :     return *this;
    1311             :   }
    1312             : 
    1313           0 :   if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
    1314           0 :     setToBogus();
    1315           0 :     return *this;
    1316           0 :   } else if(buffLength == -1) {
    1317             :     // buffLength = u_strlen(buff); but do not look beyond buffCapacity
    1318           0 :     const UChar *p = buffer, *limit = buffer + buffCapacity;
    1319           0 :     while(p != limit && *p != 0) {
    1320           0 :       ++p;
    1321             :     }
    1322           0 :     buffLength = (int32_t)(p - buffer);
    1323             :   }
    1324             : 
    1325           0 :   releaseArray();
    1326             : 
    1327           0 :   fUnion.fFields.fLengthAndFlags = kWritableAlias;
    1328           0 :   setArray(buffer, buffLength, buffCapacity);
    1329           0 :   return *this;
    1330             : }
    1331             : 
    1332           0 : UnicodeString &UnicodeString::setToUTF8(StringPiece utf8) {
    1333           0 :   unBogus();
    1334           0 :   int32_t length = utf8.length();
    1335             :   int32_t capacity;
    1336             :   // The UTF-16 string will be at most as long as the UTF-8 string.
    1337           0 :   if(length <= US_STACKBUF_SIZE) {
    1338           0 :     capacity = US_STACKBUF_SIZE;
    1339             :   } else {
    1340           0 :     capacity = length + 1;  // +1 for the terminating NUL.
    1341             :   }
    1342           0 :   UChar *utf16 = getBuffer(capacity);
    1343             :   int32_t length16;
    1344           0 :   UErrorCode errorCode = U_ZERO_ERROR;
    1345           0 :   u_strFromUTF8WithSub(utf16, getCapacity(), &length16,
    1346             :       utf8.data(), length,
    1347             :       0xfffd,  // Substitution character.
    1348             :       NULL,    // Don't care about number of substitutions.
    1349           0 :       &errorCode);
    1350           0 :   releaseBuffer(length16);
    1351           0 :   if(U_FAILURE(errorCode)) {
    1352           0 :     setToBogus();
    1353             :   }
    1354           0 :   return *this;
    1355             : }
    1356             : 
    1357             : UnicodeString&
    1358           0 : UnicodeString::setCharAt(int32_t offset,
    1359             :              UChar c)
    1360             : {
    1361           0 :   int32_t len = length();
    1362           0 :   if(cloneArrayIfNeeded() && len > 0) {
    1363           0 :     if(offset < 0) {
    1364           0 :       offset = 0;
    1365           0 :     } else if(offset >= len) {
    1366           0 :       offset = len - 1;
    1367             :     }
    1368             : 
    1369           0 :     getArrayStart()[offset] = c;
    1370             :   }
    1371           0 :   return *this;
    1372             : }
    1373             : 
    1374             : UnicodeString&
    1375           0 : UnicodeString::replace(int32_t start,
    1376             :                int32_t _length,
    1377             :                UChar32 srcChar) {
    1378             :   UChar buffer[U16_MAX_LENGTH];
    1379           0 :   int32_t count = 0;
    1380           0 :   UBool isError = FALSE;
    1381           0 :   U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
    1382             :   // We test isError so that the compiler does not complain that we don't.
    1383             :   // If isError (srcChar is not a valid code point) then count==0 which means
    1384             :   // we remove the source segment rather than replacing it with srcChar.
    1385           0 :   return doReplace(start, _length, buffer, 0, isError ? 0 : count);
    1386             : }
    1387             : 
    1388             : UnicodeString&
    1389           0 : UnicodeString::append(UChar32 srcChar) {
    1390             :   UChar buffer[U16_MAX_LENGTH];
    1391           0 :   int32_t _length = 0;
    1392           0 :   UBool isError = FALSE;
    1393           0 :   U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
    1394             :   // We test isError so that the compiler does not complain that we don't.
    1395             :   // If isError then _length==0 which turns the doAppend() into a no-op anyway.
    1396           0 :   return isError ? *this : doAppend(buffer, 0, _length);
    1397             : }
    1398             : 
    1399             : UnicodeString&
    1400           0 : UnicodeString::doReplace( int32_t start,
    1401             :               int32_t length,
    1402             :               const UnicodeString& src,
    1403             :               int32_t srcStart,
    1404             :               int32_t srcLength)
    1405             : {
    1406             :   // pin the indices to legal values
    1407           0 :   src.pinIndices(srcStart, srcLength);
    1408             : 
    1409             :   // get the characters from src
    1410             :   // and replace the range in ourselves with them
    1411           0 :   return doReplace(start, length, src.getArrayStart(), srcStart, srcLength);
    1412             : }
    1413             : 
    1414             : UnicodeString&
    1415           0 : UnicodeString::doReplace(int32_t start,
    1416             :              int32_t length,
    1417             :              const UChar *srcChars,
    1418             :              int32_t srcStart,
    1419             :              int32_t srcLength)
    1420             : {
    1421           0 :   if(!isWritable()) {
    1422           0 :     return *this;
    1423             :   }
    1424             : 
    1425           0 :   int32_t oldLength = this->length();
    1426             : 
    1427             :   // optimize (read-only alias).remove(0, start) and .remove(start, end)
    1428           0 :   if((fUnion.fFields.fLengthAndFlags&kBufferIsReadonly) && srcLength == 0) {
    1429           0 :     if(start == 0) {
    1430             :       // remove prefix by adjusting the array pointer
    1431           0 :       pinIndex(length);
    1432           0 :       fUnion.fFields.fArray += length;
    1433           0 :       fUnion.fFields.fCapacity -= length;
    1434           0 :       setLength(oldLength - length);
    1435           0 :       return *this;
    1436             :     } else {
    1437           0 :       pinIndex(start);
    1438           0 :       if(length >= (oldLength - start)) {
    1439             :         // remove suffix by reducing the length (like truncate())
    1440           0 :         setLength(start);
    1441           0 :         fUnion.fFields.fCapacity = start;  // not NUL-terminated any more
    1442           0 :         return *this;
    1443             :       }
    1444             :     }
    1445             :   }
    1446             : 
    1447           0 :   if(start == oldLength) {
    1448           0 :     return doAppend(srcChars, srcStart, srcLength);
    1449             :   }
    1450             : 
    1451           0 :   if(srcChars == 0) {
    1452           0 :     srcStart = srcLength = 0;
    1453           0 :   } else if(srcLength < 0) {
    1454             :     // get the srcLength if necessary
    1455           0 :     srcLength = u_strlen(srcChars + srcStart);
    1456             :   }
    1457             : 
    1458             :   // pin the indices to legal values
    1459           0 :   pinIndices(start, length);
    1460             : 
    1461             :   // Calculate the size of the string after the replace.
    1462             :   // Avoid int32_t overflow.
    1463           0 :   int32_t newLength = oldLength - length;
    1464           0 :   if(srcLength > (INT32_MAX - newLength)) {
    1465           0 :     setToBogus();
    1466           0 :     return *this;
    1467             :   }
    1468           0 :   newLength += srcLength;
    1469             : 
    1470             :   // cloneArrayIfNeeded(doCopyArray=FALSE) may change fArray but will not copy the current contents;
    1471             :   // therefore we need to keep the current fArray
    1472             :   UChar oldStackBuffer[US_STACKBUF_SIZE];
    1473             :   UChar *oldArray;
    1474           0 :   if((fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) && (newLength > US_STACKBUF_SIZE)) {
    1475             :     // copy the stack buffer contents because it will be overwritten with
    1476             :     // fUnion.fFields values
    1477           0 :     u_memcpy(oldStackBuffer, fUnion.fStackFields.fBuffer, oldLength);
    1478           0 :     oldArray = oldStackBuffer;
    1479             :   } else {
    1480           0 :     oldArray = getArrayStart();
    1481             :   }
    1482             : 
    1483             :   // clone our array and allocate a bigger array if needed
    1484           0 :   int32_t *bufferToDelete = 0;
    1485           0 :   if(!cloneArrayIfNeeded(newLength, getGrowCapacity(newLength),
    1486             :                          FALSE, &bufferToDelete)
    1487             :   ) {
    1488           0 :     return *this;
    1489             :   }
    1490             : 
    1491             :   // now do the replace
    1492             : 
    1493           0 :   UChar *newArray = getArrayStart();
    1494           0 :   if(newArray != oldArray) {
    1495             :     // if fArray changed, then we need to copy everything except what will change
    1496           0 :     us_arrayCopy(oldArray, 0, newArray, 0, start);
    1497           0 :     us_arrayCopy(oldArray, start + length,
    1498             :                  newArray, start + srcLength,
    1499           0 :                  oldLength - (start + length));
    1500           0 :   } else if(length != srcLength) {
    1501             :     // fArray did not change; copy only the portion that isn't changing, leaving a hole
    1502           0 :     us_arrayCopy(oldArray, start + length,
    1503             :                  newArray, start + srcLength,
    1504           0 :                  oldLength - (start + length));
    1505             :   }
    1506             : 
    1507             :   // now fill in the hole with the new string
    1508           0 :   us_arrayCopy(srcChars, srcStart, newArray, start, srcLength);
    1509             : 
    1510           0 :   setLength(newLength);
    1511             : 
    1512             :   // delayed delete in case srcChars == fArray when we started, and
    1513             :   // to keep oldArray alive for the above operations
    1514           0 :   if (bufferToDelete) {
    1515           0 :     uprv_free(bufferToDelete);
    1516             :   }
    1517             : 
    1518           0 :   return *this;
    1519             : }
    1520             : 
    1521             : // Versions of doReplace() only for append() variants.
    1522             : // doReplace() and doAppend() optimize for different cases.
    1523             : 
    1524             : UnicodeString&
    1525           0 : UnicodeString::doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength) {
    1526           0 :   if(srcLength == 0) {
    1527           0 :     return *this;
    1528             :   }
    1529             : 
    1530             :   // pin the indices to legal values
    1531           0 :   src.pinIndices(srcStart, srcLength);
    1532           0 :   return doAppend(src.getArrayStart(), srcStart, srcLength);
    1533             : }
    1534             : 
    1535             : UnicodeString&
    1536           0 : UnicodeString::doAppend(const UChar *srcChars, int32_t srcStart, int32_t srcLength) {
    1537           0 :   if(!isWritable() || srcLength == 0 || srcChars == NULL) {
    1538           0 :     return *this;
    1539             :   }
    1540             : 
    1541           0 :   if(srcLength < 0) {
    1542             :     // get the srcLength if necessary
    1543           0 :     if((srcLength = u_strlen(srcChars + srcStart)) == 0) {
    1544           0 :       return *this;
    1545             :     }
    1546             :   }
    1547             : 
    1548           0 :   int32_t oldLength = length();
    1549           0 :   int32_t newLength = oldLength + srcLength;
    1550             :   // optimize append() onto a large-enough, owned string
    1551           0 :   if((newLength <= getCapacity() && isBufferWritable()) ||
    1552           0 :       cloneArrayIfNeeded(newLength, getGrowCapacity(newLength))) {
    1553           0 :     UChar *newArray = getArrayStart();
    1554             :     // Do not copy characters when
    1555             :     //   UChar *buffer=str.getAppendBuffer(...);
    1556             :     // is followed by
    1557             :     //   str.append(buffer, length);
    1558             :     // or
    1559             :     //   str.appendString(buffer, length)
    1560             :     // or similar.
    1561           0 :     if(srcChars + srcStart != newArray + oldLength) {
    1562           0 :       us_arrayCopy(srcChars, srcStart, newArray, oldLength, srcLength);
    1563             :     }
    1564           0 :     setLength(newLength);
    1565             :   }
    1566           0 :   return *this;
    1567             : }
    1568             : 
    1569             : /**
    1570             :  * Replaceable API
    1571             :  */
    1572             : void
    1573           0 : UnicodeString::handleReplaceBetween(int32_t start,
    1574             :                                     int32_t limit,
    1575             :                                     const UnicodeString& text) {
    1576           0 :     replaceBetween(start, limit, text);
    1577           0 : }
    1578             : 
    1579             : /**
    1580             :  * Replaceable API
    1581             :  */
    1582             : void 
    1583           0 : UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) {
    1584           0 :     if (limit <= start) {
    1585           0 :         return; // Nothing to do; avoid bogus malloc call
    1586             :     }
    1587           0 :     UChar* text = (UChar*) uprv_malloc( sizeof(UChar) * (limit - start) );
    1588             :     // Check to make sure text is not null.
    1589           0 :     if (text != NULL) {
    1590           0 :             extractBetween(start, limit, text, 0);
    1591           0 :             insert(dest, text, 0, limit - start);    
    1592           0 :             uprv_free(text);
    1593             :     }
    1594             : }
    1595             : 
    1596             : /**
    1597             :  * Replaceable API
    1598             :  *
    1599             :  * NOTE: This is for the Replaceable class.  There is no rep.cpp,
    1600             :  * so we implement this function here.
    1601             :  */
    1602           0 : UBool Replaceable::hasMetaData() const {
    1603           0 :     return TRUE;
    1604             : }
    1605             : 
    1606             : /**
    1607             :  * Replaceable API
    1608             :  */
    1609           0 : UBool UnicodeString::hasMetaData() const {
    1610           0 :     return FALSE;
    1611             : }
    1612             : 
    1613             : UnicodeString&
    1614           0 : UnicodeString::doReverse(int32_t start, int32_t length) {
    1615           0 :   if(length <= 1 || !cloneArrayIfNeeded()) {
    1616           0 :     return *this;
    1617             :   }
    1618             : 
    1619             :   // pin the indices to legal values
    1620           0 :   pinIndices(start, length);
    1621           0 :   if(length <= 1) {  // pinIndices() might have shrunk the length
    1622           0 :     return *this;
    1623             :   }
    1624             : 
    1625           0 :   UChar *left = getArrayStart() + start;
    1626           0 :   UChar *right = left + length - 1;  // -1 for inclusive boundary (length>=2)
    1627             :   UChar swap;
    1628           0 :   UBool hasSupplementary = FALSE;
    1629             : 
    1630             :   // Before the loop we know left<right because length>=2.
    1631           0 :   do {
    1632           0 :     hasSupplementary |= (UBool)U16_IS_LEAD(swap = *left);
    1633           0 :     hasSupplementary |= (UBool)U16_IS_LEAD(*left++ = *right);
    1634           0 :     *right-- = swap;
    1635           0 :   } while(left < right);
    1636             :   // Make sure to test the middle code unit of an odd-length string.
    1637             :   // Redundant if the length is even.
    1638           0 :   hasSupplementary |= (UBool)U16_IS_LEAD(*left);
    1639             : 
    1640             :   /* if there are supplementary code points in the reversed range, then re-swap their surrogates */
    1641           0 :   if(hasSupplementary) {
    1642             :     UChar swap2;
    1643             : 
    1644           0 :     left = getArrayStart() + start;
    1645           0 :     right = left + length - 1; // -1 so that we can look at *(left+1) if left<right
    1646           0 :     while(left < right) {
    1647           0 :       if(U16_IS_TRAIL(swap = *left) && U16_IS_LEAD(swap2 = *(left + 1))) {
    1648           0 :         *left++ = swap2;
    1649           0 :         *left++ = swap;
    1650             :       } else {
    1651           0 :         ++left;
    1652             :       }
    1653             :     }
    1654             :   }
    1655             : 
    1656           0 :   return *this;
    1657             : }
    1658             : 
    1659             : UBool 
    1660           0 : UnicodeString::padLeading(int32_t targetLength,
    1661             :                           UChar padChar)
    1662             : {
    1663           0 :   int32_t oldLength = length();
    1664           0 :   if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
    1665           0 :     return FALSE;
    1666             :   } else {
    1667             :     // move contents up by padding width
    1668           0 :     UChar *array = getArrayStart();
    1669           0 :     int32_t start = targetLength - oldLength;
    1670           0 :     us_arrayCopy(array, 0, array, start, oldLength);
    1671             : 
    1672             :     // fill in padding character
    1673           0 :     while(--start >= 0) {
    1674           0 :       array[start] = padChar;
    1675             :     }
    1676           0 :     setLength(targetLength);
    1677           0 :     return TRUE;
    1678             :   }
    1679             : }
    1680             : 
    1681             : UBool 
    1682           0 : UnicodeString::padTrailing(int32_t targetLength,
    1683             :                            UChar padChar)
    1684             : {
    1685           0 :   int32_t oldLength = length();
    1686           0 :   if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
    1687           0 :     return FALSE;
    1688             :   } else {
    1689             :     // fill in padding character
    1690           0 :     UChar *array = getArrayStart();
    1691           0 :     int32_t length = targetLength;
    1692           0 :     while(--length >= oldLength) {
    1693           0 :       array[length] = padChar;
    1694             :     }
    1695           0 :     setLength(targetLength);
    1696           0 :     return TRUE;
    1697             :   }
    1698             : }
    1699             : 
    1700             : //========================================
    1701             : // Hashing
    1702             : //========================================
    1703             : int32_t
    1704           0 : UnicodeString::doHashCode() const
    1705             : {
    1706             :     /* Delegate hash computation to uhash.  This makes UnicodeString
    1707             :      * hashing consistent with UChar* hashing.  */
    1708           0 :     int32_t hashCode = ustr_hashUCharsN(getArrayStart(), length());
    1709           0 :     if (hashCode == kInvalidHashCode) {
    1710           0 :         hashCode = kEmptyHashCode;
    1711             :     }
    1712           0 :     return hashCode;
    1713             : }
    1714             : 
    1715             : //========================================
    1716             : // External Buffer
    1717             : //========================================
    1718             : 
    1719             : char16_t *
    1720           0 : UnicodeString::getBuffer(int32_t minCapacity) {
    1721           0 :   if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) {
    1722           0 :     fUnion.fFields.fLengthAndFlags|=kOpenGetBuffer;
    1723           0 :     setZeroLength();
    1724           0 :     return getArrayStart();
    1725             :   } else {
    1726           0 :     return nullptr;
    1727             :   }
    1728             : }
    1729             : 
    1730             : void
    1731           0 : UnicodeString::releaseBuffer(int32_t newLength) {
    1732           0 :   if(fUnion.fFields.fLengthAndFlags&kOpenGetBuffer && newLength>=-1) {
    1733             :     // set the new fLength
    1734           0 :     int32_t capacity=getCapacity();
    1735           0 :     if(newLength==-1) {
    1736             :       // the new length is the string length, capped by fCapacity
    1737           0 :       const UChar *array=getArrayStart(), *p=array, *limit=array+capacity;
    1738           0 :       while(p<limit && *p!=0) {
    1739           0 :         ++p;
    1740             :       }
    1741           0 :       newLength=(int32_t)(p-array);
    1742           0 :     } else if(newLength>capacity) {
    1743           0 :       newLength=capacity;
    1744             :     }
    1745           0 :     setLength(newLength);
    1746           0 :     fUnion.fFields.fLengthAndFlags&=~kOpenGetBuffer;
    1747             :   }
    1748           0 : }
    1749             : 
    1750             : //========================================
    1751             : // Miscellaneous
    1752             : //========================================
    1753             : UBool
    1754           0 : UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
    1755             :                                   int32_t growCapacity,
    1756             :                                   UBool doCopyArray,
    1757             :                                   int32_t **pBufferToDelete,
    1758             :                                   UBool forceClone) {
    1759             :   // default parameters need to be static, therefore
    1760             :   // the defaults are -1 to have convenience defaults
    1761           0 :   if(newCapacity == -1) {
    1762           0 :     newCapacity = getCapacity();
    1763             :   }
    1764             : 
    1765             :   // while a getBuffer(minCapacity) is "open",
    1766             :   // prevent any modifications of the string by returning FALSE here
    1767             :   // if the string is bogus, then only an assignment or similar can revive it
    1768           0 :   if(!isWritable()) {
    1769           0 :     return FALSE;
    1770             :   }
    1771             : 
    1772             :   /*
    1773             :    * We need to make a copy of the array if
    1774             :    * the buffer is read-only, or
    1775             :    * the buffer is refCounted (shared), and refCount>1, or
    1776             :    * the buffer is too small.
    1777             :    * Return FALSE if memory could not be allocated.
    1778             :    */
    1779           0 :   if(forceClone ||
    1780           0 :      fUnion.fFields.fLengthAndFlags & kBufferIsReadonly ||
    1781           0 :      (fUnion.fFields.fLengthAndFlags & kRefCounted && refCount() > 1) ||
    1782           0 :      newCapacity > getCapacity()
    1783             :   ) {
    1784             :     // check growCapacity for default value and use of the stack buffer
    1785           0 :     if(growCapacity < 0) {
    1786           0 :       growCapacity = newCapacity;
    1787           0 :     } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) {
    1788           0 :       growCapacity = US_STACKBUF_SIZE;
    1789             :     }
    1790             : 
    1791             :     // save old values
    1792             :     UChar oldStackBuffer[US_STACKBUF_SIZE];
    1793             :     UChar *oldArray;
    1794           0 :     int32_t oldLength = length();
    1795           0 :     int16_t flags = fUnion.fFields.fLengthAndFlags;
    1796             : 
    1797           0 :     if(flags&kUsingStackBuffer) {
    1798           0 :       U_ASSERT(!(flags&kRefCounted)); /* kRefCounted and kUsingStackBuffer are mutally exclusive */
    1799           0 :       if(doCopyArray && growCapacity > US_STACKBUF_SIZE) {
    1800             :         // copy the stack buffer contents because it will be overwritten with
    1801             :         // fUnion.fFields values
    1802           0 :         us_arrayCopy(fUnion.fStackFields.fBuffer, 0, oldStackBuffer, 0, oldLength);
    1803           0 :         oldArray = oldStackBuffer;
    1804             :       } else {
    1805           0 :         oldArray = NULL; // no need to copy from the stack buffer to itself
    1806             :       }
    1807             :     } else {
    1808           0 :       oldArray = fUnion.fFields.fArray;
    1809           0 :       U_ASSERT(oldArray!=NULL); /* when stack buffer is not used, oldArray must have a non-NULL reference */
    1810             :     }
    1811             : 
    1812             :     // allocate a new array
    1813           0 :     if(allocate(growCapacity) ||
    1814           0 :        (newCapacity < growCapacity && allocate(newCapacity))
    1815             :     ) {
    1816           0 :       if(doCopyArray) {
    1817             :         // copy the contents
    1818             :         // do not copy more than what fits - it may be smaller than before
    1819           0 :         int32_t minLength = oldLength;
    1820           0 :         newCapacity = getCapacity();
    1821           0 :         if(newCapacity < minLength) {
    1822           0 :           minLength = newCapacity;
    1823             :         }
    1824           0 :         if(oldArray != NULL) {
    1825           0 :           us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength);
    1826             :         }
    1827           0 :         setLength(minLength);
    1828             :       } else {
    1829           0 :         setZeroLength();
    1830             :       }
    1831             : 
    1832             :       // release the old array
    1833           0 :       if(flags & kRefCounted) {
    1834             :         // the array is refCounted; decrement and release if 0
    1835           0 :         u_atomic_int32_t *pRefCount = ((u_atomic_int32_t *)oldArray - 1);
    1836           0 :         if(umtx_atomic_dec(pRefCount) == 0) {
    1837           0 :           if(pBufferToDelete == 0) {
    1838             :               // Note: cast to (void *) is needed with MSVC, where u_atomic_int32_t
    1839             :               // is defined as volatile. (Volatile has useful non-standard behavior
    1840             :               //   with this compiler.)
    1841           0 :             uprv_free((void *)pRefCount);
    1842             :           } else {
    1843             :             // the caller requested to delete it himself
    1844           0 :             *pBufferToDelete = (int32_t *)pRefCount;
    1845             :           }
    1846             :         }
    1847             :       }
    1848             :     } else {
    1849             :       // not enough memory for growCapacity and not even for the smaller newCapacity
    1850             :       // reset the old values for setToBogus() to release the array
    1851           0 :       if(!(flags&kUsingStackBuffer)) {
    1852           0 :         fUnion.fFields.fArray = oldArray;
    1853             :       }
    1854           0 :       fUnion.fFields.fLengthAndFlags = flags;
    1855           0 :       setToBogus();
    1856           0 :       return FALSE;
    1857             :     }
    1858             :   }
    1859           0 :   return TRUE;
    1860             : }
    1861             : 
    1862             : // UnicodeStringAppendable ------------------------------------------------- ***
    1863             : 
    1864           0 : UnicodeStringAppendable::~UnicodeStringAppendable() {}
    1865             : 
    1866             : UBool
    1867           0 : UnicodeStringAppendable::appendCodeUnit(UChar c) {
    1868           0 :   return str.doAppend(&c, 0, 1).isWritable();
    1869             : }
    1870             : 
    1871             : UBool
    1872           0 : UnicodeStringAppendable::appendCodePoint(UChar32 c) {
    1873             :   UChar buffer[U16_MAX_LENGTH];
    1874           0 :   int32_t cLength = 0;
    1875           0 :   UBool isError = FALSE;
    1876           0 :   U16_APPEND(buffer, cLength, U16_MAX_LENGTH, c, isError);
    1877           0 :   return !isError && str.doAppend(buffer, 0, cLength).isWritable();
    1878             : }
    1879             : 
    1880             : UBool
    1881           0 : UnicodeStringAppendable::appendString(const UChar *s, int32_t length) {
    1882           0 :   return str.doAppend(s, 0, length).isWritable();
    1883             : }
    1884             : 
    1885             : UBool
    1886           0 : UnicodeStringAppendable::reserveAppendCapacity(int32_t appendCapacity) {
    1887           0 :   return str.cloneArrayIfNeeded(str.length() + appendCapacity);
    1888             : }
    1889             : 
    1890             : UChar *
    1891           0 : UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity,
    1892             :                                          int32_t desiredCapacityHint,
    1893             :                                          UChar *scratch, int32_t scratchCapacity,
    1894             :                                          int32_t *resultCapacity) {
    1895           0 :   if(minCapacity < 1 || scratchCapacity < minCapacity) {
    1896           0 :     *resultCapacity = 0;
    1897           0 :     return NULL;
    1898             :   }
    1899           0 :   int32_t oldLength = str.length();
    1900           0 :   if(minCapacity <= (kMaxCapacity - oldLength) &&
    1901           0 :       desiredCapacityHint <= (kMaxCapacity - oldLength) &&
    1902           0 :       str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) {
    1903           0 :     *resultCapacity = str.getCapacity() - oldLength;
    1904           0 :     return str.getArrayStart() + oldLength;
    1905             :   }
    1906           0 :   *resultCapacity = scratchCapacity;
    1907           0 :   return scratch;
    1908             : }
    1909             : 
    1910             : U_NAMESPACE_END
    1911             : 
    1912             : U_NAMESPACE_USE
    1913             : 
    1914             : U_CAPI int32_t U_EXPORT2
    1915           0 : uhash_hashUnicodeString(const UElement key) {
    1916           0 :     const UnicodeString *str = (const UnicodeString*) key.pointer;
    1917           0 :     return (str == NULL) ? 0 : str->hashCode();
    1918             : }
    1919             : 
    1920             : // Moved here from uhash_us.cpp so that using a UVector of UnicodeString*
    1921             : // does not depend on hashtable code.
    1922             : U_CAPI UBool U_EXPORT2
    1923           0 : uhash_compareUnicodeString(const UElement key1, const UElement key2) {
    1924           0 :     const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
    1925           0 :     const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
    1926           0 :     if (str1 == str2) {
    1927           0 :         return TRUE;
    1928             :     }
    1929           0 :     if (str1 == NULL || str2 == NULL) {
    1930           0 :         return FALSE;
    1931             :     }
    1932           0 :     return *str1 == *str2;
    1933             : }
    1934             : 
    1935             : #ifdef U_STATIC_IMPLEMENTATION
    1936             : /*
    1937             : This should never be called. It is defined here to make sure that the
    1938             : virtual vector deleting destructor is defined within unistr.cpp.
    1939             : The vector deleting destructor is already a part of UObject,
    1940             : but defining it here makes sure that it is included with this object file.
    1941             : This makes sure that static library dependencies are kept to a minimum.
    1942             : */
    1943           0 : static void uprv_UnicodeStringDummy(void) {
    1944           0 :     delete [] (new UnicodeString[2]);
    1945           0 : }
    1946             : #endif

Generated by: LCOV version 1.13