LCOV - code coverage report
Current view: top level - intl/icu/source/common - utext.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 0 1203 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 65 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             : *******************************************************************************
       5             : *
       6             : *   Copyright (C) 2005-2016, International Business Machines
       7             : *   Corporation and others.  All Rights Reserved.
       8             : *
       9             : *******************************************************************************
      10             : *   file name:  utext.cpp
      11             : *   encoding:   UTF-8
      12             : *   tab size:   8 (not used)
      13             : *   indentation:4
      14             : *
      15             : *   created on: 2005apr12
      16             : *   created by: Markus W. Scherer
      17             : */
      18             : 
      19             : #include "unicode/utypes.h"
      20             : #include "unicode/ustring.h"
      21             : #include "unicode/unistr.h"
      22             : #include "unicode/chariter.h"
      23             : #include "unicode/utext.h"
      24             : #include "unicode/utf.h"
      25             : #include "unicode/utf8.h"
      26             : #include "unicode/utf16.h"
      27             : #include "ustr_imp.h"
      28             : #include "cmemory.h"
      29             : #include "cstring.h"
      30             : #include "uassert.h"
      31             : #include "putilimp.h"
      32             : 
      33             : U_NAMESPACE_USE
      34             : 
      35             : #define I32_FLAG(bitIndex) ((int32_t)1<<(bitIndex))
      36             : 
      37             : 
      38             : static UBool
      39           0 : utext_access(UText *ut, int64_t index, UBool forward) {
      40           0 :     return ut->pFuncs->access(ut, index, forward);
      41             : }
      42             : 
      43             : 
      44             : 
      45             : U_CAPI UBool U_EXPORT2
      46           0 : utext_moveIndex32(UText *ut, int32_t delta) {
      47             :     UChar32  c;
      48           0 :     if (delta > 0) {
      49           0 :         do {
      50           0 :             if(ut->chunkOffset>=ut->chunkLength && !utext_access(ut, ut->chunkNativeLimit, TRUE)) {
      51           0 :                 return FALSE;
      52             :             }
      53           0 :             c = ut->chunkContents[ut->chunkOffset];
      54           0 :             if (U16_IS_SURROGATE(c)) {
      55           0 :                 c = utext_next32(ut);
      56           0 :                 if (c == U_SENTINEL) {
      57           0 :                     return FALSE;
      58             :                 }
      59             :             } else {
      60           0 :                 ut->chunkOffset++;
      61             :             }
      62             :         } while(--delta>0);
      63             : 
      64           0 :     } else if (delta<0) {
      65           0 :         do {
      66           0 :             if(ut->chunkOffset<=0 && !utext_access(ut, ut->chunkNativeStart, FALSE)) {
      67           0 :                 return FALSE;
      68             :             }
      69           0 :             c = ut->chunkContents[ut->chunkOffset-1];
      70           0 :             if (U16_IS_SURROGATE(c)) {
      71           0 :                 c = utext_previous32(ut);
      72           0 :                 if (c == U_SENTINEL) {
      73           0 :                     return FALSE;
      74             :                 }
      75             :             } else {
      76           0 :                 ut->chunkOffset--;
      77             :             }
      78             :         } while(++delta<0);
      79             :     }
      80             : 
      81           0 :     return TRUE;
      82             : }
      83             : 
      84             : 
      85             : U_CAPI int64_t U_EXPORT2
      86           0 : utext_nativeLength(UText *ut) {
      87           0 :     return ut->pFuncs->nativeLength(ut);
      88             : }
      89             : 
      90             : 
      91             : U_CAPI UBool U_EXPORT2
      92           0 : utext_isLengthExpensive(const UText *ut) {
      93           0 :     UBool r = (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE)) != 0;
      94           0 :     return r;
      95             : }
      96             : 
      97             : 
      98             : U_CAPI int64_t U_EXPORT2
      99           0 : utext_getNativeIndex(const UText *ut) {
     100           0 :     if(ut->chunkOffset <= ut->nativeIndexingLimit) {
     101           0 :         return ut->chunkNativeStart+ut->chunkOffset;
     102             :     } else {
     103           0 :         return ut->pFuncs->mapOffsetToNative(ut);
     104             :     }
     105             : }
     106             : 
     107             : 
     108             : U_CAPI void U_EXPORT2
     109           0 : utext_setNativeIndex(UText *ut, int64_t index) {
     110           0 :     if(index<ut->chunkNativeStart || index>=ut->chunkNativeLimit) {
     111             :         // The desired position is outside of the current chunk.
     112             :         // Access the new position.  Assume a forward iteration from here,
     113             :         // which will also be optimimum for a single random access.
     114             :         // Reverse iterations may suffer slightly.
     115           0 :         ut->pFuncs->access(ut, index, TRUE);
     116           0 :     } else if((int32_t)(index - ut->chunkNativeStart) <= ut->nativeIndexingLimit) {
     117             :         // utf-16 indexing.
     118           0 :         ut->chunkOffset=(int32_t)(index-ut->chunkNativeStart);
     119             :     } else {
     120           0 :          ut->chunkOffset=ut->pFuncs->mapNativeIndexToUTF16(ut, index);
     121             :     }
     122             :     // The convention is that the index must always be on a code point boundary.
     123             :     // Adjust the index position if it is in the middle of a surrogate pair.
     124           0 :     if (ut->chunkOffset<ut->chunkLength) {
     125           0 :         UChar c= ut->chunkContents[ut->chunkOffset];
     126           0 :         if (U16_IS_TRAIL(c)) {
     127           0 :             if (ut->chunkOffset==0) {
     128           0 :                 ut->pFuncs->access(ut, ut->chunkNativeStart, FALSE);
     129             :             }
     130           0 :             if (ut->chunkOffset>0) {
     131           0 :                 UChar lead = ut->chunkContents[ut->chunkOffset-1];
     132           0 :                 if (U16_IS_LEAD(lead)) {
     133           0 :                     ut->chunkOffset--;
     134             :                 }
     135             :             }
     136             :         }
     137             :     }
     138           0 : }
     139             : 
     140             : 
     141             : 
     142             : U_CAPI int64_t U_EXPORT2
     143           0 : utext_getPreviousNativeIndex(UText *ut) {
     144             :     //
     145             :     //  Fast-path the common case.
     146             :     //     Common means current position is not at the beginning of a chunk
     147             :     //     and the preceding character is not supplementary.
     148             :     //
     149           0 :     int32_t i = ut->chunkOffset - 1;
     150             :     int64_t result;
     151           0 :     if (i >= 0) {
     152           0 :         UChar c = ut->chunkContents[i];
     153           0 :         if (U16_IS_TRAIL(c) == FALSE) {
     154           0 :             if (i <= ut->nativeIndexingLimit) {
     155           0 :                 result = ut->chunkNativeStart + i;
     156             :             } else {
     157           0 :                 ut->chunkOffset = i;
     158           0 :                 result = ut->pFuncs->mapOffsetToNative(ut);
     159           0 :                 ut->chunkOffset++;
     160             :             }
     161           0 :             return result;
     162             :         }
     163             :     }
     164             : 
     165             :     // If at the start of text, simply return 0.
     166           0 :     if (ut->chunkOffset==0 && ut->chunkNativeStart==0) {
     167           0 :         return 0;
     168             :     }
     169             : 
     170             :     // Harder, less common cases.  We are at a chunk boundary, or on a surrogate.
     171             :     //    Keep it simple, use other functions to handle the edges.
     172             :     //
     173           0 :     utext_previous32(ut);
     174           0 :     result = UTEXT_GETNATIVEINDEX(ut);
     175           0 :     utext_next32(ut);
     176           0 :     return result;
     177             : }
     178             : 
     179             : 
     180             : //
     181             : //  utext_current32.  Get the UChar32 at the current position.
     182             : //                    UText iteration position is always on a code point boundary,
     183             : //                    never on the trail half of a surrogate pair.
     184             : //
     185             : U_CAPI UChar32 U_EXPORT2
     186           0 : utext_current32(UText *ut) {
     187             :     UChar32  c;
     188           0 :     if (ut->chunkOffset==ut->chunkLength) {
     189             :         // Current position is just off the end of the chunk.
     190           0 :         if (ut->pFuncs->access(ut, ut->chunkNativeLimit, TRUE) == FALSE) {
     191             :             // Off the end of the text.
     192           0 :             return U_SENTINEL;
     193             :         }
     194             :     }
     195             : 
     196           0 :     c = ut->chunkContents[ut->chunkOffset];
     197           0 :     if (U16_IS_LEAD(c) == FALSE) {
     198             :         // Normal, non-supplementary case.
     199           0 :         return c;
     200             :     }
     201             : 
     202             :     //
     203             :     //  Possible supplementary char.
     204             :     //
     205           0 :     UChar32   trail = 0;
     206           0 :     UChar32   supplementaryC = c;
     207           0 :     if ((ut->chunkOffset+1) < ut->chunkLength) {
     208             :         // The trail surrogate is in the same chunk.
     209           0 :         trail = ut->chunkContents[ut->chunkOffset+1];
     210             :     } else {
     211             :         //  The trail surrogate is in a different chunk.
     212             :         //     Because we must maintain the iteration position, we need to switch forward
     213             :         //     into the new chunk, get the trail surrogate, then revert the chunk back to the
     214             :         //     original one.
     215             :         //     An edge case to be careful of:  the entire text may end with an unpaired
     216             :         //        leading surrogate.  The attempt to access the trail will fail, but
     217             :         //        the original position before the unpaired lead still needs to be restored.
     218           0 :         int64_t  nativePosition = ut->chunkNativeLimit;
     219           0 :         int32_t  originalOffset = ut->chunkOffset;
     220           0 :         if (ut->pFuncs->access(ut, nativePosition, TRUE)) {
     221           0 :             trail = ut->chunkContents[ut->chunkOffset];
     222             :         }
     223           0 :         UBool r = ut->pFuncs->access(ut, nativePosition, FALSE);  // reverse iteration flag loads preceding chunk
     224           0 :         U_ASSERT(r==TRUE);
     225           0 :         ut->chunkOffset = originalOffset;
     226           0 :         if(!r) {
     227           0 :             return U_SENTINEL;
     228             :         }
     229             :     }
     230             : 
     231           0 :     if (U16_IS_TRAIL(trail)) {
     232           0 :         supplementaryC = U16_GET_SUPPLEMENTARY(c, trail);
     233             :     }
     234           0 :     return supplementaryC;
     235             : 
     236             : }
     237             : 
     238             : 
     239             : U_CAPI UChar32 U_EXPORT2
     240           0 : utext_char32At(UText *ut, int64_t nativeIndex) {
     241           0 :     UChar32 c = U_SENTINEL;
     242             : 
     243             :     // Fast path the common case.
     244           0 :     if (nativeIndex>=ut->chunkNativeStart && nativeIndex < ut->chunkNativeStart + ut->nativeIndexingLimit) {
     245           0 :         ut->chunkOffset = (int32_t)(nativeIndex - ut->chunkNativeStart);
     246           0 :         c = ut->chunkContents[ut->chunkOffset];
     247           0 :         if (U16_IS_SURROGATE(c) == FALSE) {
     248           0 :             return c;
     249             :         }
     250             :     }
     251             : 
     252             : 
     253           0 :     utext_setNativeIndex(ut, nativeIndex);
     254           0 :     if (nativeIndex>=ut->chunkNativeStart && ut->chunkOffset<ut->chunkLength) {
     255           0 :         c = ut->chunkContents[ut->chunkOffset];
     256           0 :         if (U16_IS_SURROGATE(c)) {
     257             :             // For surrogates, let current32() deal with the complications
     258             :             //    of supplementaries that may span chunk boundaries.
     259           0 :             c = utext_current32(ut);
     260             :         }
     261             :     }
     262           0 :     return c;
     263             : }
     264             : 
     265             : 
     266             : U_CAPI UChar32 U_EXPORT2
     267           0 : utext_next32(UText *ut) {
     268             :     UChar32       c;
     269             : 
     270           0 :     if (ut->chunkOffset >= ut->chunkLength) {
     271           0 :         if (ut->pFuncs->access(ut, ut->chunkNativeLimit, TRUE) == FALSE) {
     272           0 :             return U_SENTINEL;
     273             :         }
     274             :     }
     275             : 
     276           0 :     c = ut->chunkContents[ut->chunkOffset++];
     277           0 :     if (U16_IS_LEAD(c) == FALSE) {
     278             :         // Normal case, not supplementary.
     279             :         //   (A trail surrogate seen here is just returned as is, as a surrogate value.
     280             :         //    It cannot be part of a pair.)
     281           0 :         return c;
     282             :     }
     283             : 
     284           0 :     if (ut->chunkOffset >= ut->chunkLength) {
     285           0 :         if (ut->pFuncs->access(ut, ut->chunkNativeLimit, TRUE) == FALSE) {
     286             :             // c is an unpaired lead surrogate at the end of the text.
     287             :             // return it as it is.
     288           0 :             return c;
     289             :         }
     290             :     }
     291           0 :     UChar32 trail = ut->chunkContents[ut->chunkOffset];
     292           0 :     if (U16_IS_TRAIL(trail) == FALSE) {
     293             :         // c was an unpaired lead surrogate, not at the end of the text.
     294             :         // return it as it is (unpaired).  Iteration position is on the
     295             :         // following character, possibly in the next chunk, where the
     296             :         //  trail surrogate would have been if it had existed.
     297           0 :         return c;
     298             :     }
     299             : 
     300           0 :     UChar32 supplementary = U16_GET_SUPPLEMENTARY(c, trail);
     301           0 :     ut->chunkOffset++;   // move iteration position over the trail surrogate.
     302           0 :     return supplementary;
     303             :     }
     304             : 
     305             : 
     306             : U_CAPI UChar32 U_EXPORT2
     307           0 : utext_previous32(UText *ut) {
     308             :     UChar32       c;
     309             : 
     310           0 :     if (ut->chunkOffset <= 0) {
     311           0 :         if (ut->pFuncs->access(ut, ut->chunkNativeStart, FALSE) == FALSE) {
     312           0 :             return U_SENTINEL;
     313             :         }
     314             :     }
     315           0 :     ut->chunkOffset--;
     316           0 :     c = ut->chunkContents[ut->chunkOffset];
     317           0 :     if (U16_IS_TRAIL(c) == FALSE) {
     318             :         // Normal case, not supplementary.
     319             :         //   (A lead surrogate seen here is just returned as is, as a surrogate value.
     320             :         //    It cannot be part of a pair.)
     321           0 :         return c;
     322             :     }
     323             : 
     324           0 :     if (ut->chunkOffset <= 0) {
     325           0 :         if (ut->pFuncs->access(ut, ut->chunkNativeStart, FALSE) == FALSE) {
     326             :             // c is an unpaired trail surrogate at the start of the text.
     327             :             // return it as it is.
     328           0 :             return c;
     329             :         }
     330             :     }
     331             : 
     332           0 :     UChar32 lead = ut->chunkContents[ut->chunkOffset-1];
     333           0 :     if (U16_IS_LEAD(lead) == FALSE) {
     334             :         // c was an unpaired trail surrogate, not at the end of the text.
     335             :         // return it as it is (unpaired).  Iteration position is at c
     336           0 :         return c;
     337             :     }
     338             : 
     339           0 :     UChar32 supplementary = U16_GET_SUPPLEMENTARY(lead, c);
     340           0 :     ut->chunkOffset--;   // move iteration position over the lead surrogate.
     341           0 :     return supplementary;
     342             : }
     343             : 
     344             : 
     345             : 
     346             : U_CAPI UChar32 U_EXPORT2
     347           0 : utext_next32From(UText *ut, int64_t index) {
     348           0 :     UChar32       c      = U_SENTINEL;
     349             : 
     350           0 :     if(index<ut->chunkNativeStart || index>=ut->chunkNativeLimit) {
     351             :         // Desired position is outside of the current chunk.
     352           0 :         if(!ut->pFuncs->access(ut, index, TRUE)) {
     353             :             // no chunk available here
     354           0 :             return U_SENTINEL;
     355             :         }
     356           0 :     } else if (index - ut->chunkNativeStart  <= (int64_t)ut->nativeIndexingLimit) {
     357             :         // Desired position is in chunk, with direct 1:1 native to UTF16 indexing
     358           0 :         ut->chunkOffset = (int32_t)(index - ut->chunkNativeStart);
     359             :     } else {
     360             :         // Desired position is in chunk, with non-UTF16 indexing.
     361           0 :         ut->chunkOffset = ut->pFuncs->mapNativeIndexToUTF16(ut, index);
     362             :     }
     363             : 
     364           0 :     c = ut->chunkContents[ut->chunkOffset++];
     365           0 :     if (U16_IS_SURROGATE(c)) {
     366             :         // Surrogates.  Many edge cases.  Use other functions that already
     367             :         //              deal with the problems.
     368           0 :         utext_setNativeIndex(ut, index);
     369           0 :         c = utext_next32(ut);
     370             :     }
     371           0 :     return c;
     372             : }
     373             : 
     374             : 
     375             : U_CAPI UChar32 U_EXPORT2
     376           0 : utext_previous32From(UText *ut, int64_t index) {
     377             :     //
     378             :     //  Return the character preceding the specified index.
     379             :     //  Leave the iteration position at the start of the character that was returned.
     380             :     //
     381             :     UChar32     cPrev;    // The character preceding cCurr, which is what we will return.
     382             : 
     383             :     // Address the chunk containg the position preceding the incoming index
     384             :     // A tricky edge case:
     385             :     //   We try to test the requested native index against the chunkNativeStart to determine
     386             :     //    whether the character preceding the one at the index is in the current chunk.
     387             :     //    BUT, this test can fail with UTF-8 (or any other multibyte encoding), when the
     388             :     //    requested index is on something other than the first position of the first char.
     389             :     //
     390           0 :     if(index<=ut->chunkNativeStart || index>ut->chunkNativeLimit) {
     391             :         // Requested native index is outside of the current chunk.
     392           0 :         if(!ut->pFuncs->access(ut, index, FALSE)) {
     393             :             // no chunk available here
     394           0 :             return U_SENTINEL;
     395             :         }
     396           0 :     } else if(index - ut->chunkNativeStart <= (int64_t)ut->nativeIndexingLimit) {
     397             :         // Direct UTF-16 indexing.
     398           0 :         ut->chunkOffset = (int32_t)(index - ut->chunkNativeStart);
     399             :     } else {
     400           0 :         ut->chunkOffset=ut->pFuncs->mapNativeIndexToUTF16(ut, index);
     401           0 :         if (ut->chunkOffset==0 && !ut->pFuncs->access(ut, index, FALSE)) {
     402             :             // no chunk available here
     403           0 :             return U_SENTINEL;
     404             :         }
     405             :     }
     406             : 
     407             :     //
     408             :     // Simple case with no surrogates.
     409             :     //
     410           0 :     ut->chunkOffset--;
     411           0 :     cPrev = ut->chunkContents[ut->chunkOffset];
     412             : 
     413           0 :     if (U16_IS_SURROGATE(cPrev)) {
     414             :         // Possible supplementary.  Many edge cases.
     415             :         // Let other functions do the heavy lifting.
     416           0 :         utext_setNativeIndex(ut, index);
     417           0 :         cPrev = utext_previous32(ut);
     418             :     }
     419           0 :     return cPrev;
     420             : }
     421             : 
     422             : 
     423             : U_CAPI int32_t U_EXPORT2
     424           0 : utext_extract(UText *ut,
     425             :              int64_t start, int64_t limit,
     426             :              UChar *dest, int32_t destCapacity,
     427             :              UErrorCode *status) {
     428           0 :                  return ut->pFuncs->extract(ut, start, limit, dest, destCapacity, status);
     429             :              }
     430             : 
     431             : 
     432             : 
     433             : U_CAPI UBool U_EXPORT2
     434           0 : utext_equals(const UText *a, const UText *b) {
     435           0 :     if (a==NULL || b==NULL ||
     436           0 :         a->magic != UTEXT_MAGIC ||
     437           0 :         b->magic != UTEXT_MAGIC) {
     438             :             // Null or invalid arguments don't compare equal to anything.
     439           0 :             return FALSE;
     440             :     }
     441             : 
     442           0 :     if (a->pFuncs != b->pFuncs) {
     443             :         // Different types of text providers.
     444           0 :         return FALSE;
     445             :     }
     446             : 
     447           0 :     if (a->context != b->context) {
     448             :         // Different sources (different strings)
     449           0 :         return FALSE;
     450             :     }
     451           0 :     if (utext_getNativeIndex(a) != utext_getNativeIndex(b)) {
     452             :         // Different current position in the string.
     453           0 :         return FALSE;
     454             :     }
     455             : 
     456           0 :     return TRUE;
     457             : }
     458             : 
     459             : U_CAPI UBool U_EXPORT2
     460           0 : utext_isWritable(const UText *ut)
     461             : {
     462           0 :     UBool b = (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_WRITABLE)) != 0;
     463           0 :     return b;
     464             : }
     465             : 
     466             : 
     467             : U_CAPI void U_EXPORT2
     468           0 : utext_freeze(UText *ut) {
     469             :     // Zero out the WRITABLE flag.
     470           0 :     ut->providerProperties &= ~(I32_FLAG(UTEXT_PROVIDER_WRITABLE));
     471           0 : }
     472             : 
     473             : 
     474             : U_CAPI UBool U_EXPORT2
     475           0 : utext_hasMetaData(const UText *ut)
     476             : {
     477           0 :     UBool b = (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_HAS_META_DATA)) != 0;
     478           0 :     return b;
     479             : }
     480             : 
     481             : 
     482             : 
     483             : U_CAPI int32_t U_EXPORT2
     484           0 : utext_replace(UText *ut,
     485             :              int64_t nativeStart, int64_t nativeLimit,
     486             :              const UChar *replacementText, int32_t replacementLength,
     487             :              UErrorCode *status)
     488             : {
     489           0 :     if (U_FAILURE(*status)) {
     490           0 :         return 0;
     491             :     }
     492           0 :     if ((ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_WRITABLE)) == 0) {
     493           0 :         *status = U_NO_WRITE_PERMISSION;
     494           0 :         return 0;
     495             :     }
     496           0 :     int32_t i = ut->pFuncs->replace(ut, nativeStart, nativeLimit, replacementText, replacementLength, status);
     497           0 :     return i;
     498             : }
     499             : 
     500             : U_CAPI void U_EXPORT2
     501           0 : utext_copy(UText *ut,
     502             :           int64_t nativeStart, int64_t nativeLimit,
     503             :           int64_t destIndex,
     504             :           UBool move,
     505             :           UErrorCode *status)
     506             : {
     507           0 :     if (U_FAILURE(*status)) {
     508           0 :         return;
     509             :     }
     510           0 :     if ((ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_WRITABLE)) == 0) {
     511           0 :         *status = U_NO_WRITE_PERMISSION;
     512           0 :         return;
     513             :     }
     514           0 :     ut->pFuncs->copy(ut, nativeStart, nativeLimit, destIndex, move, status);
     515             : }
     516             : 
     517             : 
     518             : 
     519             : U_CAPI UText * U_EXPORT2
     520           0 : utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status) {
     521           0 :     if (U_FAILURE(*status)) {
     522           0 :         return dest;
     523             :     }
     524           0 :     UText *result = src->pFuncs->clone(dest, src, deep, status);
     525           0 :     if (U_FAILURE(*status)) {
     526           0 :         return result;
     527             :     }
     528           0 :     if (result == NULL) {
     529           0 :         *status = U_MEMORY_ALLOCATION_ERROR;
     530           0 :         return result;
     531             :     }
     532           0 :     if (readOnly) {
     533           0 :         utext_freeze(result);
     534             :     }
     535           0 :     return result;
     536             : }
     537             : 
     538             : 
     539             : 
     540             : //------------------------------------------------------------------------------
     541             : //
     542             : //   UText common functions implementation
     543             : //
     544             : //------------------------------------------------------------------------------
     545             : 
     546             : //
     547             : //  UText.flags bit definitions
     548             : //
     549             : enum {
     550             :     UTEXT_HEAP_ALLOCATED  = 1,      //  1 if ICU has allocated this UText struct on the heap.
     551             :                                     //  0 if caller provided storage for the UText.
     552             : 
     553             :     UTEXT_EXTRA_HEAP_ALLOCATED = 2, //  1 if ICU has allocated extra storage as a separate
     554             :                                     //     heap block.
     555             :                                     //  0 if there is no separate allocation.  Either no extra
     556             :                                     //     storage was requested, or it is appended to the end
     557             :                                     //     of the main UText storage.
     558             : 
     559             :     UTEXT_OPEN = 4                  //  1 if this UText is currently open
     560             :                                     //  0 if this UText is not open.
     561             : };
     562             : 
     563             : 
     564             : //
     565             : //  Extended form of a UText.  The purpose is to aid in computing the total size required
     566             : //    when a provider asks for a UText to be allocated with extra storage.
     567             : 
     568             : struct ExtendedUText {
     569             :     UText          ut;
     570             :     UAlignedMemory extension;
     571             : };
     572             : 
     573             : static const UText emptyText = UTEXT_INITIALIZER;
     574             : 
     575             : U_CAPI UText * U_EXPORT2
     576           0 : utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status) {
     577           0 :     if (U_FAILURE(*status)) {
     578           0 :         return ut;
     579             :     }
     580             : 
     581           0 :     if (ut == NULL) {
     582             :         // We need to heap-allocate storage for the new UText
     583           0 :         int32_t spaceRequired = sizeof(UText);
     584           0 :         if (extraSpace > 0) {
     585           0 :             spaceRequired = sizeof(ExtendedUText) + extraSpace - sizeof(UAlignedMemory);
     586             :         }
     587           0 :         ut = (UText *)uprv_malloc(spaceRequired);
     588           0 :         if (ut == NULL) {
     589           0 :             *status = U_MEMORY_ALLOCATION_ERROR;
     590           0 :             return NULL;
     591             :         } else {
     592           0 :             *ut = emptyText;
     593           0 :             ut->flags |= UTEXT_HEAP_ALLOCATED;
     594           0 :             if (spaceRequired>0) {
     595           0 :                 ut->extraSize = extraSpace;
     596           0 :                 ut->pExtra    = &((ExtendedUText *)ut)->extension;
     597             :             }
     598             :         }
     599             :     } else {
     600             :         // We have been supplied with an already existing UText.
     601             :         // Verify that it really appears to be a UText.
     602           0 :         if (ut->magic != UTEXT_MAGIC) {
     603           0 :             *status = U_ILLEGAL_ARGUMENT_ERROR;
     604           0 :             return ut;
     605             :         }
     606             :         // If the ut is already open and there's a provider supplied close
     607             :         //   function, call it.
     608           0 :         if ((ut->flags & UTEXT_OPEN) && ut->pFuncs->close != NULL)  {
     609           0 :             ut->pFuncs->close(ut);
     610             :         }
     611           0 :         ut->flags &= ~UTEXT_OPEN;
     612             : 
     613             :         // If extra space was requested by our caller, check whether
     614             :         //   sufficient already exists, and allocate new if needed.
     615           0 :         if (extraSpace > ut->extraSize) {
     616             :             // Need more space.  If there is existing separately allocated space,
     617             :             //   delete it first, then allocate new space.
     618           0 :             if (ut->flags & UTEXT_EXTRA_HEAP_ALLOCATED) {
     619           0 :                 uprv_free(ut->pExtra);
     620           0 :                 ut->extraSize = 0;
     621             :             }
     622           0 :             ut->pExtra = uprv_malloc(extraSpace);
     623           0 :             if (ut->pExtra == NULL) {
     624           0 :                 *status = U_MEMORY_ALLOCATION_ERROR;
     625             :             } else {
     626           0 :                 ut->extraSize = extraSpace;
     627           0 :                 ut->flags |= UTEXT_EXTRA_HEAP_ALLOCATED;
     628             :             }
     629             :         }
     630             :     }
     631           0 :     if (U_SUCCESS(*status)) {
     632           0 :         ut->flags |= UTEXT_OPEN;
     633             : 
     634             :         // Initialize all remaining fields of the UText.
     635             :         //
     636           0 :         ut->context             = NULL;
     637           0 :         ut->chunkContents       = NULL;
     638           0 :         ut->p                   = NULL;
     639           0 :         ut->q                   = NULL;
     640           0 :         ut->r                   = NULL;
     641           0 :         ut->a                   = 0;
     642           0 :         ut->b                   = 0;
     643           0 :         ut->c                   = 0;
     644           0 :         ut->chunkOffset         = 0;
     645           0 :         ut->chunkLength         = 0;
     646           0 :         ut->chunkNativeStart    = 0;
     647           0 :         ut->chunkNativeLimit    = 0;
     648           0 :         ut->nativeIndexingLimit = 0;
     649           0 :         ut->providerProperties  = 0;
     650           0 :         ut->privA               = 0;
     651           0 :         ut->privB               = 0;
     652           0 :         ut->privC               = 0;
     653           0 :         ut->privP               = NULL;
     654           0 :         if (ut->pExtra!=NULL && ut->extraSize>0)
     655           0 :             uprv_memset(ut->pExtra, 0, ut->extraSize);
     656             : 
     657             :     }
     658           0 :     return ut;
     659             : }
     660             : 
     661             : 
     662             : U_CAPI UText * U_EXPORT2
     663           0 : utext_close(UText *ut) {
     664           0 :     if (ut==NULL ||
     665           0 :         ut->magic != UTEXT_MAGIC ||
     666           0 :         (ut->flags & UTEXT_OPEN) == 0)
     667             :     {
     668             :         // The supplied ut is not an open UText.
     669             :         // Do nothing.
     670           0 :         return ut;
     671             :     }
     672             : 
     673             :     // If the provider gave us a close function, call it now.
     674             :     // This will clean up anything allocated specifically by the provider.
     675           0 :     if (ut->pFuncs->close != NULL) {
     676           0 :         ut->pFuncs->close(ut);
     677             :     }
     678           0 :     ut->flags &= ~UTEXT_OPEN;
     679             : 
     680             :     // If we (the framework) allocated the UText or subsidiary storage,
     681             :     //   delete it.
     682           0 :     if (ut->flags & UTEXT_EXTRA_HEAP_ALLOCATED) {
     683           0 :         uprv_free(ut->pExtra);
     684           0 :         ut->pExtra = NULL;
     685           0 :         ut->flags &= ~UTEXT_EXTRA_HEAP_ALLOCATED;
     686           0 :         ut->extraSize = 0;
     687             :     }
     688             : 
     689             :     // Zero out function table of the closed UText.  This is a defensive move,
     690             :     //   inteded to cause applications that inadvertantly use a closed
     691             :     //   utext to crash with null pointer errors.
     692           0 :     ut->pFuncs        = NULL;
     693             : 
     694           0 :     if (ut->flags & UTEXT_HEAP_ALLOCATED) {
     695             :         // This UText was allocated by UText setup.  We need to free it.
     696             :         // Clear magic, so we can detect if the user messes up and immediately
     697             :         //  tries to reopen another UText using the deleted storage.
     698           0 :         ut->magic = 0;
     699           0 :         uprv_free(ut);
     700           0 :         ut = NULL;
     701             :     }
     702           0 :     return ut;
     703             : }
     704             : 
     705             : 
     706             : 
     707             : 
     708             : //
     709             : // invalidateChunk   Reset a chunk to have no contents, so that the next call
     710             : //                   to access will cause new data to load.
     711             : //                   This is needed when copy/move/replace operate directly on the
     712             : //                   backing text, potentially putting it out of sync with the
     713             : //                   contents in the chunk.
     714             : //
     715             : static void
     716           0 : invalidateChunk(UText *ut) {
     717           0 :     ut->chunkLength = 0;
     718           0 :     ut->chunkNativeLimit = 0;
     719           0 :     ut->chunkNativeStart = 0;
     720           0 :     ut->chunkOffset = 0;
     721           0 :     ut->nativeIndexingLimit = 0;
     722           0 : }
     723             : 
     724             : //
     725             : // pinIndex        Do range pinning on a native index parameter.
     726             : //                 64 bit pinning is done in place.
     727             : //                 32 bit truncated result is returned as a convenience for
     728             : //                        use in providers that don't need 64 bits.
     729             : static int32_t
     730           0 : pinIndex(int64_t &index, int64_t limit) {
     731           0 :     if (index<0) {
     732           0 :         index = 0;
     733           0 :     } else if (index > limit) {
     734           0 :         index = limit;
     735             :     }
     736           0 :     return (int32_t)index;
     737             : }
     738             : 
     739             : 
     740             : U_CDECL_BEGIN
     741             : 
     742             : //
     743             : // Pointer relocation function,
     744             : //   a utility used by shallow clone.
     745             : //   Adjust a pointer that refers to something within one UText (the source)
     746             : //   to refer to the same relative offset within a another UText (the target)
     747             : //
     748           0 : static void adjustPointer(UText *dest, const void **destPtr, const UText *src) {
     749             :     // convert all pointers to (char *) so that byte address arithmetic will work.
     750           0 :     char  *dptr = (char *)*destPtr;
     751           0 :     char  *dUText = (char *)dest;
     752           0 :     char  *sUText = (char *)src;
     753             : 
     754           0 :     if (dptr >= (char *)src->pExtra && dptr < ((char*)src->pExtra)+src->extraSize) {
     755             :         // target ptr was to something within the src UText's pExtra storage.
     756             :         //   relocate it into the target UText's pExtra region.
     757           0 :         *destPtr = ((char *)dest->pExtra) + (dptr - (char *)src->pExtra);
     758           0 :     } else if (dptr>=sUText && dptr < sUText+src->sizeOfStruct) {
     759             :         // target ptr was pointing to somewhere within the source UText itself.
     760             :         //   Move it to the same offset within the target UText.
     761           0 :         *destPtr = dUText + (dptr-sUText);
     762             :     }
     763           0 : }
     764             : 
     765             : 
     766             : //
     767             : //  Clone.  This is a generic copy-the-utext-by-value clone function that can be
     768             : //          used as-is with some utext types, and as a helper by other clones.
     769             : //
     770             : static UText * U_CALLCONV
     771           0 : shallowTextClone(UText * dest, const UText * src, UErrorCode * status) {
     772           0 :     if (U_FAILURE(*status)) {
     773           0 :         return NULL;
     774             :     }
     775           0 :     int32_t  srcExtraSize = src->extraSize;
     776             : 
     777             :     //
     778             :     // Use the generic text_setup to allocate storage if required.
     779             :     //
     780           0 :     dest = utext_setup(dest, srcExtraSize, status);
     781           0 :     if (U_FAILURE(*status)) {
     782           0 :         return dest;
     783             :     }
     784             : 
     785             :     //
     786             :     //  flags (how the UText was allocated) and the pointer to the
     787             :     //   extra storage must retain the values in the cloned utext that
     788             :     //   were set up by utext_setup.  Save them separately before
     789             :     //   copying the whole struct.
     790             :     //
     791           0 :     void *destExtra = dest->pExtra;
     792           0 :     int32_t flags   = dest->flags;
     793             : 
     794             : 
     795             :     //
     796             :     //  Copy the whole UText struct by value.
     797             :     //  Any "Extra" storage is copied also.
     798             :     //
     799           0 :     int sizeToCopy = src->sizeOfStruct;
     800           0 :     if (sizeToCopy > dest->sizeOfStruct) {
     801           0 :         sizeToCopy = dest->sizeOfStruct;
     802             :     }
     803           0 :     uprv_memcpy(dest, src, sizeToCopy);
     804           0 :     dest->pExtra = destExtra;
     805           0 :     dest->flags  = flags;
     806           0 :     if (srcExtraSize > 0) {
     807           0 :         uprv_memcpy(dest->pExtra, src->pExtra, srcExtraSize);
     808             :     }
     809             : 
     810             :     //
     811             :     // Relocate any pointers in the target that refer to the UText itself
     812             :     //   to point to the cloned copy rather than the original source.
     813             :     //
     814           0 :     adjustPointer(dest, &dest->context, src);
     815           0 :     adjustPointer(dest, &dest->p, src);
     816           0 :     adjustPointer(dest, &dest->q, src);
     817           0 :     adjustPointer(dest, &dest->r, src);
     818           0 :     adjustPointer(dest, (const void **)&dest->chunkContents, src);
     819             : 
     820             :     // The newly shallow-cloned UText does _not_ own the underlying storage for the text.
     821             :     // (The source for the clone may or may not have owned the text.)
     822             : 
     823           0 :     dest->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT);
     824             : 
     825           0 :     return dest;
     826             : }
     827             : 
     828             : 
     829             : U_CDECL_END
     830             : 
     831             : 
     832             : 
     833             : //------------------------------------------------------------------------------
     834             : //
     835             : //     UText implementation for UTF-8 char * strings (read-only)
     836             : //     Limitation:  string length must be <= 0x7fffffff in length.
     837             : //                  (length must for in an int32_t variable)
     838             : //
     839             : //         Use of UText data members:
     840             : //              context    pointer to UTF-8 string
     841             : //              utext.b    is the input string length (bytes).
     842             : //              utext.c    Length scanned so far in string
     843             : //                           (for optimizing finding length of zero terminated strings.)
     844             : //              utext.p    pointer to the current buffer
     845             : //              utext.q    pointer to the other buffer.
     846             : //
     847             : //------------------------------------------------------------------------------
     848             : 
     849             : // Chunk size.
     850             : //     Must be less than 42  (256/6), because of byte mapping from UChar indexes to native indexes.
     851             : //     Worst case there are six UTF-8 bytes per UChar.
     852             : //         obsolete 6 byte form fd + 5 trails maps to fffd
     853             : //         obsolete 5 byte form fc + 4 trails maps to fffd
     854             : //         non-shortest 4 byte forms maps to fffd
     855             : //         normal supplementaries map to a pair of utf-16, two utf8 bytes per utf-16 unit
     856             : //     mapToUChars array size must allow for the worst case, 6.
     857             : //     This could be brought down to 4, by treating fd and fc as pure illegal,
     858             : //     rather than obsolete lead bytes. But that is not compatible with the utf-8 access macros.
     859             : //
     860             : enum { UTF8_TEXT_CHUNK_SIZE=32 };
     861             : 
     862             : //
     863             : // UTF8Buf  Two of these structs will be set up in the UText's extra allocated space.
     864             : //          Each contains the UChar chunk buffer, the to and from native maps, and
     865             : //          header info.
     866             : //
     867             : //     because backwards iteration fills the buffers starting at the end and
     868             : //     working towards the front, the filled part of the buffers may not begin
     869             : //     at the start of the available storage for the buffers.
     870             : //
     871             : //     Buffer size is one bigger than the specified UTF8_TEXT_CHUNK_SIZE to allow for
     872             : //     the last character added being a supplementary, and thus requiring a surrogate
     873             : //     pair.  Doing this is simpler than checking for the edge case.
     874             : //
     875             : 
     876             : struct UTF8Buf {
     877             :     int32_t   bufNativeStart;                        // Native index of first char in UChar buf
     878             :     int32_t   bufNativeLimit;                        // Native index following last char in buf.
     879             :     int32_t   bufStartIdx;                           // First filled position in buf.
     880             :     int32_t   bufLimitIdx;                           // Limit of filled range in buf.
     881             :     int32_t   bufNILimit;                            // Limit of native indexing part of buf
     882             :     int32_t   toUCharsMapStart;                      // Native index corresponding to
     883             :                                                      //   mapToUChars[0].
     884             :                                                      //   Set to bufNativeStart when filling forwards.
     885             :                                                      //   Set to computed value when filling backwards.
     886             : 
     887             :     UChar     buf[UTF8_TEXT_CHUNK_SIZE+4];           // The UChar buffer.  Requires one extra position beyond the
     888             :                                                      //   the chunk size, to allow for surrogate at the end.
     889             :                                                      //   Length must be identical to mapToNative array, below,
     890             :                                                      //   because of the way indexing works when the array is
     891             :                                                      //   filled backwards during a reverse iteration.  Thus,
     892             :                                                      //   the additional extra size.
     893             :     uint8_t   mapToNative[UTF8_TEXT_CHUNK_SIZE+4];   // map UChar index in buf to
     894             :                                                      //  native offset from bufNativeStart.
     895             :                                                      //  Requires two extra slots,
     896             :                                                      //    one for a supplementary starting in the last normal position,
     897             :                                                      //    and one for an entry for the buffer limit position.
     898             :     uint8_t   mapToUChars[UTF8_TEXT_CHUNK_SIZE*6+6]; // Map native offset from bufNativeStart to
     899             :                                                      //   correspoding offset in filled part of buf.
     900             :     int32_t   align;
     901             : };
     902             : 
     903             : U_CDECL_BEGIN
     904             : 
     905             : //
     906             : //   utf8TextLength
     907             : //
     908             : //        Get the length of the string.  If we don't already know it,
     909             : //              we'll need to scan for the trailing  nul.
     910             : //
     911             : static int64_t U_CALLCONV
     912           0 : utf8TextLength(UText *ut) {
     913           0 :     if (ut->b < 0) {
     914             :         // Zero terminated string, and we haven't scanned to the end yet.
     915             :         // Scan it now.
     916           0 :         const char *r = (const char *)ut->context + ut->c;
     917           0 :         while (*r != 0) {
     918           0 :             r++;
     919             :         }
     920           0 :         if ((r - (const char *)ut->context) < 0x7fffffff) {
     921           0 :             ut->b = (int32_t)(r - (const char *)ut->context);
     922             :         } else {
     923             :             // Actual string was bigger (more than 2 gig) than we
     924             :             //   can handle.  Clip it to 2 GB.
     925           0 :             ut->b = 0x7fffffff;
     926             :         }
     927           0 :         ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);
     928             :     }
     929           0 :     return ut->b;
     930             : }
     931             : 
     932             : 
     933             : 
     934             : 
     935             : 
     936             : 
     937             : static UBool U_CALLCONV
     938           0 : utf8TextAccess(UText *ut, int64_t index, UBool forward) {
     939             :     //
     940             :     //  Apologies to those who are allergic to goto statements.
     941             :     //    Consider each goto to a labelled block to be the equivalent of
     942             :     //         call the named block as if it were a function();
     943             :     //         return;
     944             :     //
     945           0 :     const uint8_t *s8=(const uint8_t *)ut->context;
     946           0 :     UTF8Buf *u8b = NULL;
     947           0 :     int32_t  length = ut->b;         // Length of original utf-8
     948           0 :     int32_t  ix= (int32_t)index;     // Requested index, trimmed to 32 bits.
     949           0 :     int32_t  mapIndex = 0;
     950           0 :     if (index<0) {
     951           0 :         ix=0;
     952           0 :     } else if (index > 0x7fffffff) {
     953             :         // Strings with 64 bit lengths not supported by this UTF-8 provider.
     954           0 :         ix = 0x7fffffff;
     955             :     }
     956             : 
     957             :     // Pin requested index to the string length.
     958           0 :     if (ix>length) {
     959           0 :         if (length>=0) {
     960           0 :             ix=length;
     961           0 :         } else if (ix>=ut->c) {
     962             :             // Zero terminated string, and requested index is beyond
     963             :             //   the region that has already been scanned.
     964             :             //   Scan up to either the end of the string or to the
     965             :             //   requested position, whichever comes first.
     966           0 :             while (ut->c<ix && s8[ut->c]!=0) {
     967           0 :                 ut->c++;
     968             :             }
     969             :             //  TODO:  support for null terminated string length > 32 bits.
     970           0 :             if (s8[ut->c] == 0) {
     971             :                 // We just found the actual length of the string.
     972             :                 //  Trim the requested index back to that.
     973           0 :                 ix     = ut->c;
     974           0 :                 ut->b  = ut->c;
     975           0 :                 length = ut->c;
     976           0 :                 ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);
     977             :             }
     978             :         }
     979             :     }
     980             : 
     981             :     //
     982             :     // Dispatch to the appropriate action for a forward iteration request.
     983             :     //
     984           0 :     if (forward) {
     985           0 :         if (ix==ut->chunkNativeLimit) {
     986             :             // Check for normal sequential iteration cases first.
     987           0 :             if (ix==length) {
     988             :                 // Just reached end of string
     989             :                 // Don't swap buffers, but do set the
     990             :                 //   current buffer position.
     991           0 :                 ut->chunkOffset = ut->chunkLength;
     992           0 :                 return FALSE;
     993             :             } else {
     994             :                 // End of current buffer.
     995             :                 //   check whether other buffer already has what we need.
     996           0 :                 UTF8Buf *altB = (UTF8Buf *)ut->q;
     997           0 :                 if (ix>=altB->bufNativeStart && ix<altB->bufNativeLimit) {
     998           0 :                     goto swapBuffers;
     999             :                 }
    1000             :             }
    1001             :         }
    1002             : 
    1003             :         // A random access.  Desired index could be in either or niether buf.
    1004             :         // For optimizing the order of testing, first check for the index
    1005             :         //    being in the other buffer.  This will be the case for uses that
    1006             :         //    move back and forth over a fairly limited range
    1007             :         {
    1008           0 :             u8b = (UTF8Buf *)ut->q;   // the alternate buffer
    1009           0 :             if (ix>=u8b->bufNativeStart && ix<u8b->bufNativeLimit) {
    1010             :                 // Requested index is in the other buffer.
    1011           0 :                 goto swapBuffers;
    1012             :             }
    1013           0 :             if (ix == length) {
    1014             :                 // Requested index is end-of-string.
    1015             :                 //   (this is the case of randomly seeking to the end.
    1016             :                 //    The case of iterating off the end is handled earlier.)
    1017           0 :                 if (ix == ut->chunkNativeLimit) {
    1018             :                     // Current buffer extends up to the end of the string.
    1019             :                     //   Leave it as the current buffer.
    1020           0 :                     ut->chunkOffset = ut->chunkLength;
    1021           0 :                     return FALSE;
    1022             :                 }
    1023           0 :                 if (ix == u8b->bufNativeLimit) {
    1024             :                     // Alternate buffer extends to the end of string.
    1025             :                     //   Swap it in as the current buffer.
    1026           0 :                     goto swapBuffersAndFail;
    1027             :                 }
    1028             : 
    1029             :                 // Neither existing buffer extends to the end of the string.
    1030           0 :                 goto makeStubBuffer;
    1031             :             }
    1032             : 
    1033           0 :             if (ix<ut->chunkNativeStart || ix>=ut->chunkNativeLimit) {
    1034             :                 // Requested index is in neither buffer.
    1035             :                 goto fillForward;
    1036             :             }
    1037             : 
    1038             :             // Requested index is in this buffer.
    1039           0 :             u8b = (UTF8Buf *)ut->p;   // the current buffer
    1040           0 :             mapIndex = ix - u8b->toUCharsMapStart;
    1041           0 :             U_ASSERT(mapIndex < (int32_t)sizeof(UTF8Buf::mapToUChars));
    1042           0 :             ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;
    1043           0 :             return TRUE;
    1044             : 
    1045             :         }
    1046             :     }
    1047             : 
    1048             : 
    1049             :     //
    1050             :     // Dispatch to the appropriate action for a
    1051             :     //   Backwards Diretion iteration request.
    1052             :     //
    1053           0 :     if (ix==ut->chunkNativeStart) {
    1054             :         // Check for normal sequential iteration cases first.
    1055           0 :         if (ix==0) {
    1056             :             // Just reached the start of string
    1057             :             // Don't swap buffers, but do set the
    1058             :             //   current buffer position.
    1059           0 :             ut->chunkOffset = 0;
    1060           0 :             return FALSE;
    1061             :         } else {
    1062             :             // Start of current buffer.
    1063             :             //   check whether other buffer already has what we need.
    1064           0 :             UTF8Buf *altB = (UTF8Buf *)ut->q;
    1065           0 :             if (ix>altB->bufNativeStart && ix<=altB->bufNativeLimit) {
    1066           0 :                 goto swapBuffers;
    1067             :             }
    1068             :         }
    1069             :     }
    1070             : 
    1071             :     // A random access.  Desired index could be in either or niether buf.
    1072             :     // For optimizing the order of testing,
    1073             :     //    Most likely case:  in the other buffer.
    1074             :     //    Second most likely: in neither buffer.
    1075             :     //    Unlikely, but must work:  in the current buffer.
    1076           0 :     u8b = (UTF8Buf *)ut->q;   // the alternate buffer
    1077           0 :     if (ix>u8b->bufNativeStart && ix<=u8b->bufNativeLimit) {
    1078             :         // Requested index is in the other buffer.
    1079           0 :         goto swapBuffers;
    1080             :     }
    1081             :     // Requested index is start-of-string.
    1082             :     //   (this is the case of randomly seeking to the start.
    1083             :     //    The case of iterating off the start is handled earlier.)
    1084           0 :     if (ix==0) {
    1085           0 :         if (u8b->bufNativeStart==0) {
    1086             :             // Alternate buffer contains the data for the start string.
    1087             :             // Make it be the current buffer.
    1088           0 :             goto swapBuffersAndFail;
    1089             :         } else {
    1090             :             // Request for data before the start of string,
    1091             :             //   neither buffer is usable.
    1092             :             //   set up a zero-length buffer.
    1093           0 :             goto makeStubBuffer;
    1094             :         }
    1095             :     }
    1096             : 
    1097           0 :     if (ix<=ut->chunkNativeStart || ix>ut->chunkNativeLimit) {
    1098             :         // Requested index is in neither buffer.
    1099             :         goto fillReverse;
    1100             :     }
    1101             : 
    1102             :     // Requested index is in this buffer.
    1103             :     //   Set the utf16 buffer index.
    1104           0 :     u8b = (UTF8Buf *)ut->p;
    1105           0 :     mapIndex = ix - u8b->toUCharsMapStart;
    1106           0 :     ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;
    1107           0 :     if (ut->chunkOffset==0) {
    1108             :         // This occurs when the first character in the text is
    1109             :         //   a multi-byte UTF-8 char, and the requested index is to
    1110             :         //   one of the trailing bytes.  Because there is no preceding ,
    1111             :         //   character, this access fails.  We can't pick up on the
    1112             :         //   situation sooner because the requested index is not zero.
    1113           0 :         return FALSE;
    1114             :     } else {
    1115           0 :         return TRUE;
    1116             :     }
    1117             : 
    1118             : 
    1119             : 
    1120             : swapBuffers:
    1121             :     //  The alternate buffer (ut->q) has the string data that was requested.
    1122             :     //  Swap the primary and alternate buffers, and set the
    1123             :     //   chunk index into the new primary buffer.
    1124             :     {
    1125           0 :         u8b   = (UTF8Buf *)ut->q;
    1126           0 :         ut->q = ut->p;
    1127           0 :         ut->p = u8b;
    1128           0 :         ut->chunkContents       = &u8b->buf[u8b->bufStartIdx];
    1129           0 :         ut->chunkLength         = u8b->bufLimitIdx - u8b->bufStartIdx;
    1130           0 :         ut->chunkNativeStart    = u8b->bufNativeStart;
    1131           0 :         ut->chunkNativeLimit    = u8b->bufNativeLimit;
    1132           0 :         ut->nativeIndexingLimit = u8b->bufNILimit;
    1133             : 
    1134             :         // Index into the (now current) chunk
    1135             :         // Use the map to set the chunk index.  It's more trouble than it's worth
    1136             :         //    to check whether native indexing can be used.
    1137           0 :         U_ASSERT(ix>=u8b->bufNativeStart);
    1138           0 :         U_ASSERT(ix<=u8b->bufNativeLimit);
    1139           0 :         mapIndex = ix - u8b->toUCharsMapStart;
    1140           0 :         U_ASSERT(mapIndex>=0);
    1141           0 :         U_ASSERT(mapIndex<(int32_t)sizeof(u8b->mapToUChars));
    1142           0 :         ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;
    1143             : 
    1144           0 :         return TRUE;
    1145             :     }
    1146             : 
    1147             : 
    1148             :  swapBuffersAndFail:
    1149             :     // We got a request for either the start or end of the string,
    1150             :     //  with iteration continuing in the out-of-bounds direction.
    1151             :     // The alternate buffer already contains the data up to the
    1152             :     //  start/end.
    1153             :     // Swap the buffers, then return failure, indicating that we couldn't
    1154             :     //  make things correct for continuing the iteration in the requested
    1155             :     //  direction.  The position & buffer are correct should the
    1156             :     //  user decide to iterate in the opposite direction.
    1157           0 :     u8b   = (UTF8Buf *)ut->q;
    1158           0 :     ut->q = ut->p;
    1159           0 :     ut->p = u8b;
    1160           0 :     ut->chunkContents       = &u8b->buf[u8b->bufStartIdx];
    1161           0 :     ut->chunkLength         = u8b->bufLimitIdx - u8b->bufStartIdx;
    1162           0 :     ut->chunkNativeStart    = u8b->bufNativeStart;
    1163           0 :     ut->chunkNativeLimit    = u8b->bufNativeLimit;
    1164           0 :     ut->nativeIndexingLimit = u8b->bufNILimit;
    1165             : 
    1166             :     // Index into the (now current) chunk
    1167             :     //  For this function  (swapBuffersAndFail), the requested index
    1168             :     //    will always be at either the start or end of the chunk.
    1169           0 :     if (ix==u8b->bufNativeLimit) {
    1170           0 :         ut->chunkOffset = ut->chunkLength;
    1171             :     } else  {
    1172           0 :         ut->chunkOffset = 0;
    1173           0 :         U_ASSERT(ix == u8b->bufNativeStart);
    1174             :     }
    1175           0 :     return FALSE;
    1176             : 
    1177             : makeStubBuffer:
    1178             :     //   The user has done a seek/access past the start or end
    1179             :     //   of the string.  Rather than loading data that is likely
    1180             :     //   to never be used, just set up a zero-length buffer at
    1181             :     //   the position.
    1182           0 :     u8b = (UTF8Buf *)ut->q;
    1183           0 :     u8b->bufNativeStart   = ix;
    1184           0 :     u8b->bufNativeLimit   = ix;
    1185           0 :     u8b->bufStartIdx      = 0;
    1186           0 :     u8b->bufLimitIdx      = 0;
    1187           0 :     u8b->bufNILimit       = 0;
    1188           0 :     u8b->toUCharsMapStart = ix;
    1189           0 :     u8b->mapToNative[0]   = 0;
    1190           0 :     u8b->mapToUChars[0]   = 0;
    1191           0 :     goto swapBuffersAndFail;
    1192             : 
    1193             : 
    1194             : 
    1195             : fillForward:
    1196             :     {
    1197             :         // Move the incoming index to a code point boundary.
    1198           0 :         U8_SET_CP_START(s8, 0, ix);
    1199             : 
    1200             :         // Swap the UText buffers.
    1201             :         //  We want to fill what was previously the alternate buffer,
    1202             :         //  and make what was the current buffer be the new alternate.
    1203           0 :         UTF8Buf *u8b = (UTF8Buf *)ut->q;
    1204           0 :         ut->q = ut->p;
    1205           0 :         ut->p = u8b;
    1206             : 
    1207           0 :         int32_t strLen = ut->b;
    1208           0 :         UBool   nulTerminated = FALSE;
    1209           0 :         if (strLen < 0) {
    1210           0 :             strLen = 0x7fffffff;
    1211           0 :             nulTerminated = TRUE;
    1212             :         }
    1213             : 
    1214           0 :         UChar   *buf = u8b->buf;
    1215           0 :         uint8_t *mapToNative  = u8b->mapToNative;
    1216           0 :         uint8_t *mapToUChars  = u8b->mapToUChars;
    1217           0 :         int32_t  destIx       = 0;
    1218           0 :         int32_t  srcIx        = ix;
    1219           0 :         UBool    seenNonAscii = FALSE;
    1220           0 :         UChar32  c = 0;
    1221             : 
    1222             :         // Fill the chunk buffer and mapping arrays.
    1223           0 :         while (destIx<UTF8_TEXT_CHUNK_SIZE) {
    1224           0 :             c = s8[srcIx];
    1225           0 :             if (c>0 && c<0x80) {
    1226             :                 // Special case ASCII range for speed.
    1227             :                 //   zero is excluded to simplify bounds checking.
    1228           0 :                 buf[destIx] = (UChar)c;
    1229           0 :                 mapToNative[destIx]    = (uint8_t)(srcIx - ix);
    1230           0 :                 mapToUChars[srcIx-ix]  = (uint8_t)destIx;
    1231           0 :                 srcIx++;
    1232           0 :                 destIx++;
    1233             :             } else {
    1234             :                 // General case, handle everything.
    1235           0 :                 if (seenNonAscii == FALSE) {
    1236           0 :                     seenNonAscii = TRUE;
    1237           0 :                     u8b->bufNILimit = destIx;
    1238             :                 }
    1239             : 
    1240           0 :                 int32_t  cIx      = srcIx;
    1241           0 :                 int32_t  dIx      = destIx;
    1242           0 :                 int32_t  dIxSaved = destIx;
    1243           0 :                 U8_NEXT_OR_FFFD(s8, srcIx, strLen, c);
    1244           0 :                 if (c==0 && nulTerminated) {
    1245           0 :                     srcIx--;
    1246           0 :                     break;
    1247             :                 }
    1248             : 
    1249           0 :                 U16_APPEND_UNSAFE(buf, destIx, c);
    1250           0 :                 do {
    1251           0 :                     mapToNative[dIx++] = (uint8_t)(cIx - ix);
    1252           0 :                 } while (dIx < destIx);
    1253             : 
    1254           0 :                 do {
    1255           0 :                     mapToUChars[cIx++ - ix] = (uint8_t)dIxSaved;
    1256           0 :                 } while (cIx < srcIx);
    1257             :             }
    1258           0 :             if (srcIx>=strLen) {
    1259           0 :                 break;
    1260             :             }
    1261             : 
    1262             :         }
    1263             : 
    1264             :         //  store Native <--> Chunk Map entries for the end of the buffer.
    1265             :         //    There is no actual character here, but the index position is valid.
    1266           0 :         mapToNative[destIx]     = (uint8_t)(srcIx - ix);
    1267           0 :         mapToUChars[srcIx - ix] = (uint8_t)destIx;
    1268             : 
    1269             :         //  fill in Buffer descriptor
    1270           0 :         u8b->bufNativeStart     = ix;
    1271           0 :         u8b->bufNativeLimit     = srcIx;
    1272           0 :         u8b->bufStartIdx        = 0;
    1273           0 :         u8b->bufLimitIdx        = destIx;
    1274           0 :         if (seenNonAscii == FALSE) {
    1275           0 :             u8b->bufNILimit     = destIx;
    1276             :         }
    1277           0 :         u8b->toUCharsMapStart   = u8b->bufNativeStart;
    1278             : 
    1279             :         // Set UText chunk to refer to this buffer.
    1280           0 :         ut->chunkContents       = buf;
    1281           0 :         ut->chunkOffset         = 0;
    1282           0 :         ut->chunkLength         = u8b->bufLimitIdx;
    1283           0 :         ut->chunkNativeStart    = u8b->bufNativeStart;
    1284           0 :         ut->chunkNativeLimit    = u8b->bufNativeLimit;
    1285           0 :         ut->nativeIndexingLimit = u8b->bufNILimit;
    1286             : 
    1287             :         // For zero terminated strings, keep track of the maximum point
    1288             :         //   scanned so far.
    1289           0 :         if (nulTerminated && srcIx>ut->c) {
    1290           0 :             ut->c = srcIx;
    1291           0 :             if (c==0) {
    1292             :                 // We scanned to the end.
    1293             :                 //   Remember the actual length.
    1294           0 :                 ut->b = srcIx;
    1295           0 :                 ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);
    1296             :             }
    1297             :         }
    1298           0 :         return TRUE;
    1299             :     }
    1300             : 
    1301             : 
    1302             : fillReverse:
    1303             :     {
    1304             :         // Move the incoming index to a code point boundary.
    1305             :         // Can only do this if the incoming index is somewhere in the interior of the string.
    1306             :         //   If index is at the end, there is no character there to look at.
    1307           0 :         if (ix != ut->b) {
    1308             :             // Note: this function will only move the index back if it is on a trail byte
    1309             :             //       and there is a preceding lead byte and the sequence from the lead 
    1310             :             //       through this trail could be part of a valid UTF-8 sequence
    1311             :             //       Otherwise the index remains unchanged.
    1312           0 :             U8_SET_CP_START(s8, 0, ix);
    1313             :         }
    1314             : 
    1315             :         // Swap the UText buffers.
    1316             :         //  We want to fill what was previously the alternate buffer,
    1317             :         //  and make what was the current buffer be the new alternate.
    1318           0 :         UTF8Buf *u8b = (UTF8Buf *)ut->q;
    1319           0 :         ut->q = ut->p;
    1320           0 :         ut->p = u8b;
    1321             : 
    1322           0 :         UChar   *buf = u8b->buf;
    1323           0 :         uint8_t *mapToNative = u8b->mapToNative;
    1324           0 :         uint8_t *mapToUChars = u8b->mapToUChars;
    1325           0 :         int32_t  toUCharsMapStart = ix - sizeof(UTF8Buf::mapToUChars) + 1;
    1326             :         // Note that toUCharsMapStart can be negative. Happens when the remaining
    1327             :         // text from current position to the beginning is less than the buffer size.
    1328             :         // + 1 because mapToUChars must have a slot at the end for the bufNativeLimit entry.
    1329           0 :         int32_t  destIx = UTF8_TEXT_CHUNK_SIZE+2;   // Start in the overflow region
    1330             :                                                     //   at end of buffer to leave room
    1331             :                                                     //   for a surrogate pair at the
    1332             :                                                     //   buffer start.
    1333           0 :         int32_t  srcIx  = ix;
    1334           0 :         int32_t  bufNILimit = destIx;
    1335             :         UChar32   c;
    1336             : 
    1337             :         // Map to/from Native Indexes, fill in for the position at the end of
    1338             :         //   the buffer.
    1339             :         //
    1340           0 :         mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
    1341           0 :         mapToUChars[srcIx - toUCharsMapStart] = (uint8_t)destIx;
    1342             : 
    1343             :         // Fill the chunk buffer
    1344             :         // Work backwards, filling from the end of the buffer towards the front.
    1345             :         //
    1346           0 :         while (destIx>2 && (srcIx - toUCharsMapStart > 5) && (srcIx > 0)) {
    1347           0 :             srcIx--;
    1348           0 :             destIx--;
    1349             : 
    1350             :             // Get last byte of the UTF-8 character
    1351           0 :             c = s8[srcIx];
    1352           0 :             if (c<0x80) {
    1353             :                 // Special case ASCII range for speed.
    1354           0 :                 buf[destIx] = (UChar)c;
    1355           0 :                 U_ASSERT(toUCharsMapStart <= srcIx);
    1356           0 :                 mapToUChars[srcIx - toUCharsMapStart] = (uint8_t)destIx;
    1357           0 :                 mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
    1358             :             } else {
    1359             :                 // General case, handle everything non-ASCII.
    1360             : 
    1361           0 :                 int32_t  sIx      = srcIx;  // ix of last byte of multi-byte u8 char
    1362             : 
    1363             :                 // Get the full character from the UTF8 string.
    1364             :                 //   use code derived from tbe macros in utf8.h
    1365             :                 //   Leaves srcIx pointing at the first byte of the UTF-8 char.
    1366             :                 //
    1367           0 :                 c=utf8_prevCharSafeBody(s8, 0, &srcIx, c, -3);
    1368             :                 // leaves srcIx at first byte of the multi-byte char.
    1369             : 
    1370             :                 // Store the character in UTF-16 buffer.
    1371           0 :                 if (c<0x10000) {
    1372           0 :                     buf[destIx] = (UChar)c;
    1373           0 :                     mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
    1374             :                 } else {
    1375           0 :                     buf[destIx]         = U16_TRAIL(c);
    1376           0 :                     mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
    1377           0 :                     buf[--destIx]       = U16_LEAD(c);
    1378           0 :                     mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
    1379             :                 }
    1380             : 
    1381             :                 // Fill in the map from native indexes to UChars buf index.
    1382           0 :                 do {
    1383           0 :                     mapToUChars[sIx-- - toUCharsMapStart] = (uint8_t)destIx;
    1384           0 :                 } while (sIx >= srcIx);
    1385           0 :                 U_ASSERT(toUCharsMapStart <= (srcIx+1));
    1386             : 
    1387             :                 // Set native indexing limit to be the current position.
    1388             :                 //   We are processing a non-ascii, non-native-indexing char now;
    1389             :                 //     the limit will be here if the rest of the chars to be
    1390             :                 //     added to this buffer are ascii.
    1391           0 :                 bufNILimit = destIx;
    1392             :             }
    1393             :         }
    1394           0 :         u8b->bufNativeStart     = srcIx;
    1395           0 :         u8b->bufNativeLimit     = ix;
    1396           0 :         u8b->bufStartIdx        = destIx;
    1397           0 :         u8b->bufLimitIdx        = UTF8_TEXT_CHUNK_SIZE+2;
    1398           0 :         u8b->bufNILimit         = bufNILimit - u8b->bufStartIdx;
    1399           0 :         u8b->toUCharsMapStart   = toUCharsMapStart;
    1400             : 
    1401           0 :         ut->chunkContents       = &buf[u8b->bufStartIdx];
    1402           0 :         ut->chunkLength         = u8b->bufLimitIdx - u8b->bufStartIdx;
    1403           0 :         ut->chunkOffset         = ut->chunkLength;
    1404           0 :         ut->chunkNativeStart    = u8b->bufNativeStart;
    1405           0 :         ut->chunkNativeLimit    = u8b->bufNativeLimit;
    1406           0 :         ut->nativeIndexingLimit = u8b->bufNILimit;
    1407           0 :         return TRUE;
    1408             :     }
    1409             : 
    1410             : }
    1411             : 
    1412             : 
    1413             : 
    1414             : //
    1415             : //  This is a slightly modified copy of u_strFromUTF8,
    1416             : //     Inserts a Replacement Char rather than failing on invalid UTF-8
    1417             : //     Removes unnecessary features.
    1418             : //
    1419             : static UChar*
    1420           0 : utext_strFromUTF8(UChar *dest,
    1421             :               int32_t destCapacity,
    1422             :               int32_t *pDestLength,
    1423             :               const char* src,
    1424             :               int32_t srcLength,        // required.  NUL terminated not supported.
    1425             :               UErrorCode *pErrorCode
    1426             :               )
    1427             : {
    1428             : 
    1429           0 :     UChar *pDest = dest;
    1430           0 :     UChar *pDestLimit = (dest!=NULL)?(dest+destCapacity):NULL;
    1431           0 :     UChar32 ch=0;
    1432           0 :     int32_t index = 0;
    1433           0 :     int32_t reqLength = 0;
    1434           0 :     uint8_t* pSrc = (uint8_t*) src;
    1435             : 
    1436             : 
    1437           0 :     while((index < srcLength)&&(pDest<pDestLimit)){
    1438           0 :         ch = pSrc[index++];
    1439           0 :         if(ch <=0x7f){
    1440           0 :             *pDest++=(UChar)ch;
    1441             :         }else{
    1442           0 :             ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, -3);
    1443           0 :             if(U_IS_BMP(ch)){
    1444           0 :                 *(pDest++)=(UChar)ch;
    1445             :             }else{
    1446           0 :                 *(pDest++)=U16_LEAD(ch);
    1447           0 :                 if(pDest<pDestLimit){
    1448           0 :                     *(pDest++)=U16_TRAIL(ch);
    1449             :                 }else{
    1450           0 :                     reqLength++;
    1451           0 :                     break;
    1452             :                 }
    1453             :             }
    1454             :         }
    1455             :     }
    1456             :     /* donot fill the dest buffer just count the UChars needed */
    1457           0 :     while(index < srcLength){
    1458           0 :         ch = pSrc[index++];
    1459           0 :         if(ch <= 0x7f){
    1460           0 :             reqLength++;
    1461             :         }else{
    1462           0 :             ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, -3);
    1463           0 :             reqLength+=U16_LENGTH(ch);
    1464             :         }
    1465             :     }
    1466             : 
    1467           0 :     reqLength+=(int32_t)(pDest - dest);
    1468             : 
    1469           0 :     if(pDestLength){
    1470           0 :         *pDestLength = reqLength;
    1471             :     }
    1472             : 
    1473             :     /* Terminate the buffer */
    1474           0 :     u_terminateUChars(dest,destCapacity,reqLength,pErrorCode);
    1475             : 
    1476           0 :     return dest;
    1477             : }
    1478             : 
    1479             : 
    1480             : 
    1481             : static int32_t U_CALLCONV
    1482           0 : utf8TextExtract(UText *ut,
    1483             :                 int64_t start, int64_t limit,
    1484             :                 UChar *dest, int32_t destCapacity,
    1485             :                 UErrorCode *pErrorCode) {
    1486           0 :     if(U_FAILURE(*pErrorCode)) {
    1487           0 :         return 0;
    1488             :     }
    1489           0 :     if(destCapacity<0 || (dest==NULL && destCapacity>0)) {
    1490           0 :         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    1491           0 :         return 0;
    1492             :     }
    1493           0 :     int32_t  length  = ut->b;
    1494           0 :     int32_t  start32 = pinIndex(start, length);
    1495           0 :     int32_t  limit32 = pinIndex(limit, length);
    1496             : 
    1497           0 :     if(start32>limit32) {
    1498           0 :         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    1499           0 :         return 0;
    1500             :     }
    1501             : 
    1502             : 
    1503             :     // adjust the incoming indexes to land on code point boundaries if needed.
    1504             :     //    adjust by no more than three, because that is the largest number of trail bytes
    1505             :     //    in a well formed UTF8 character.
    1506           0 :     const uint8_t *buf = (const uint8_t *)ut->context;
    1507             :     int i;
    1508           0 :     if (start32 < ut->chunkNativeLimit) {
    1509           0 :         for (i=0; i<3; i++) {
    1510           0 :             if (U8_IS_SINGLE(buf[start32]) || U8_IS_LEAD(buf[start32]) || start32==0) {
    1511             :                 break;
    1512             :             }
    1513           0 :             start32--;
    1514             :         }
    1515             :     }
    1516             : 
    1517           0 :     if (limit32 < ut->chunkNativeLimit) {
    1518           0 :         for (i=0; i<3; i++) {
    1519           0 :             if (U8_IS_SINGLE(buf[limit32]) || U8_IS_LEAD(buf[limit32]) || limit32==0) {
    1520             :                 break;
    1521             :             }
    1522           0 :             limit32--;
    1523             :         }
    1524             :     }
    1525             : 
    1526             :     // Do the actual extract.
    1527           0 :     int32_t destLength=0;
    1528           0 :     utext_strFromUTF8(dest, destCapacity, &destLength,
    1529           0 :                     (const char *)ut->context+start32, limit32-start32,
    1530           0 :                     pErrorCode);
    1531           0 :     utf8TextAccess(ut, limit32, TRUE);
    1532           0 :     return destLength;
    1533             : }
    1534             : 
    1535             : //
    1536             : // utf8TextMapOffsetToNative
    1537             : //
    1538             : // Map a chunk (UTF-16) offset to a native index.
    1539             : static int64_t U_CALLCONV
    1540           0 : utf8TextMapOffsetToNative(const UText *ut) {
    1541             :     //
    1542           0 :     UTF8Buf *u8b = (UTF8Buf *)ut->p;
    1543           0 :     U_ASSERT(ut->chunkOffset>ut->nativeIndexingLimit && ut->chunkOffset<=ut->chunkLength);
    1544           0 :     int32_t nativeOffset = u8b->mapToNative[ut->chunkOffset + u8b->bufStartIdx] + u8b->toUCharsMapStart;
    1545           0 :     U_ASSERT(nativeOffset >= ut->chunkNativeStart && nativeOffset <= ut->chunkNativeLimit);
    1546           0 :     return nativeOffset;
    1547             : }
    1548             : 
    1549             : //
    1550             : // Map a native index to the corrsponding chunk offset
    1551             : //
    1552             : static int32_t U_CALLCONV
    1553           0 : utf8TextMapIndexToUTF16(const UText *ut, int64_t index64) {
    1554           0 :     U_ASSERT(index64 <= 0x7fffffff);
    1555           0 :     int32_t index = (int32_t)index64;
    1556           0 :     UTF8Buf *u8b = (UTF8Buf *)ut->p;
    1557           0 :     U_ASSERT(index>=ut->chunkNativeStart+ut->nativeIndexingLimit);
    1558           0 :     U_ASSERT(index<=ut->chunkNativeLimit);
    1559           0 :     int32_t mapIndex = index - u8b->toUCharsMapStart;
    1560           0 :     U_ASSERT(mapIndex < (int32_t)sizeof(UTF8Buf::mapToUChars));
    1561           0 :     int32_t offset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;
    1562           0 :     U_ASSERT(offset>=0 && offset<=ut->chunkLength);
    1563           0 :     return offset;
    1564             : }
    1565             : 
    1566             : static UText * U_CALLCONV
    1567           0 : utf8TextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status)
    1568             : {
    1569             :     // First do a generic shallow clone.  Does everything needed for the UText struct itself.
    1570           0 :     dest = shallowTextClone(dest, src, status);
    1571             : 
    1572             :     // For deep clones, make a copy of the string.
    1573             :     //  The copied storage is owned by the newly created clone.
    1574             :     //
    1575             :     // TODO:  There is an isssue with using utext_nativeLength().
    1576             :     //        That function is non-const in cases where the input was NUL terminated
    1577             :     //          and the length has not yet been determined.
    1578             :     //        This function (clone()) is const.
    1579             :     //        There potentially a thread safety issue lurking here.
    1580             :     //
    1581           0 :     if (deep && U_SUCCESS(*status)) {
    1582           0 :         int32_t  len = (int32_t)utext_nativeLength((UText *)src);
    1583           0 :         char *copyStr = (char *)uprv_malloc(len+1);
    1584           0 :         if (copyStr == NULL) {
    1585           0 :             *status = U_MEMORY_ALLOCATION_ERROR;
    1586             :         } else {
    1587           0 :             uprv_memcpy(copyStr, src->context, len+1);
    1588           0 :             dest->context = copyStr;
    1589           0 :             dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT);
    1590             :         }
    1591             :     }
    1592           0 :     return dest;
    1593             : }
    1594             : 
    1595             : 
    1596             : static void U_CALLCONV
    1597           0 : utf8TextClose(UText *ut) {
    1598             :     // Most of the work of close is done by the generic UText framework close.
    1599             :     // All that needs to be done here is to delete the UTF8 string if the UText
    1600             :     //  owns it.  This occurs if the UText was created by cloning.
    1601           0 :     if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)) {
    1602           0 :         char *s = (char *)ut->context;
    1603           0 :         uprv_free(s);
    1604           0 :         ut->context = NULL;
    1605             :     }
    1606           0 : }
    1607             : 
    1608             : U_CDECL_END
    1609             : 
    1610             : 
    1611             : static const struct UTextFuncs utf8Funcs =
    1612             : {
    1613             :     sizeof(UTextFuncs),
    1614             :     0, 0, 0,             // Reserved alignment padding
    1615             :     utf8TextClone,
    1616             :     utf8TextLength,
    1617             :     utf8TextAccess,
    1618             :     utf8TextExtract,
    1619             :     NULL,                /* replace*/
    1620             :     NULL,                /* copy   */
    1621             :     utf8TextMapOffsetToNative,
    1622             :     utf8TextMapIndexToUTF16,
    1623             :     utf8TextClose,
    1624             :     NULL,                // spare 1
    1625             :     NULL,                // spare 2
    1626             :     NULL                 // spare 3
    1627             : };
    1628             : 
    1629             : 
    1630             : static const char gEmptyString[] = {0};
    1631             : 
    1632             : U_CAPI UText * U_EXPORT2
    1633           0 : utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status) {
    1634           0 :     if(U_FAILURE(*status)) {
    1635           0 :         return NULL;
    1636             :     }
    1637           0 :     if(s==NULL && length==0) {
    1638           0 :         s = gEmptyString;
    1639             :     }
    1640             : 
    1641           0 :     if(s==NULL || length<-1 || length>INT32_MAX) {
    1642           0 :         *status=U_ILLEGAL_ARGUMENT_ERROR;
    1643           0 :         return NULL;
    1644             :     }
    1645             : 
    1646           0 :     ut = utext_setup(ut, sizeof(UTF8Buf) * 2, status);
    1647           0 :     if (U_FAILURE(*status)) {
    1648           0 :         return ut;
    1649             :     }
    1650             : 
    1651           0 :     ut->pFuncs  = &utf8Funcs;
    1652           0 :     ut->context = s;
    1653           0 :     ut->b       = (int32_t)length;
    1654           0 :     ut->c       = (int32_t)length;
    1655           0 :     if (ut->c < 0) {
    1656           0 :         ut->c = 0;
    1657           0 :         ut->providerProperties |= I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);
    1658             :     }
    1659           0 :     ut->p = ut->pExtra;
    1660           0 :     ut->q = (char *)ut->pExtra + sizeof(UTF8Buf);
    1661           0 :     return ut;
    1662             : 
    1663             : }
    1664             : 
    1665             : 
    1666             : 
    1667             : 
    1668             : 
    1669             : 
    1670             : 
    1671             : 
    1672             : //------------------------------------------------------------------------------
    1673             : //
    1674             : //     UText implementation wrapper for Replaceable (read/write)
    1675             : //
    1676             : //         Use of UText data members:
    1677             : //            context    pointer to Replaceable.
    1678             : //            p          pointer to Replaceable if it is owned by the UText.
    1679             : //
    1680             : //------------------------------------------------------------------------------
    1681             : 
    1682             : 
    1683             : 
    1684             : // minimum chunk size for this implementation: 3
    1685             : // to allow for possible trimming for code point boundaries
    1686             : enum { REP_TEXT_CHUNK_SIZE=10 };
    1687             : 
    1688             : struct ReplExtra {
    1689             :     /*
    1690             :      * Chunk UChars.
    1691             :      * +1 to simplify filling with surrogate pair at the end.
    1692             :      */
    1693             :     UChar s[REP_TEXT_CHUNK_SIZE+1];
    1694             : };
    1695             : 
    1696             : 
    1697             : U_CDECL_BEGIN
    1698             : 
    1699             : static UText * U_CALLCONV
    1700           0 : repTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status) {
    1701             :     // First do a generic shallow clone.  Does everything needed for the UText struct itself.
    1702           0 :     dest = shallowTextClone(dest, src, status);
    1703             : 
    1704             :     // For deep clones, make a copy of the Replaceable.
    1705             :     //  The copied Replaceable storage is owned by the newly created UText clone.
    1706             :     //  A non-NULL pointer in UText.p is the signal to the close() function to delete
    1707             :     //    it.
    1708             :     //
    1709           0 :     if (deep && U_SUCCESS(*status)) {
    1710           0 :         const Replaceable *replSrc = (const Replaceable *)src->context;
    1711           0 :         dest->context = replSrc->clone();
    1712           0 :         dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT);
    1713             : 
    1714             :         // with deep clone, the copy is writable, even when the source is not.
    1715           0 :         dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_WRITABLE);
    1716             :     }
    1717           0 :     return dest;
    1718             : }
    1719             : 
    1720             : 
    1721             : static void U_CALLCONV
    1722           0 : repTextClose(UText *ut) {
    1723             :     // Most of the work of close is done by the generic UText framework close.
    1724             :     // All that needs to be done here is delete the Replaceable if the UText
    1725             :     //  owns it.  This occurs if the UText was created by cloning.
    1726           0 :     if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)) {
    1727           0 :         Replaceable *rep = (Replaceable *)ut->context;
    1728           0 :         delete rep;
    1729           0 :         ut->context = NULL;
    1730             :     }
    1731           0 : }
    1732             : 
    1733             : 
    1734             : static int64_t U_CALLCONV
    1735           0 : repTextLength(UText *ut) {
    1736           0 :     const Replaceable *replSrc = (const Replaceable *)ut->context;
    1737           0 :     int32_t  len = replSrc->length();
    1738           0 :     return len;
    1739             : }
    1740             : 
    1741             : 
    1742             : static UBool U_CALLCONV
    1743           0 : repTextAccess(UText *ut, int64_t index, UBool forward) {
    1744           0 :     const Replaceable *rep=(const Replaceable *)ut->context;
    1745           0 :     int32_t length=rep->length();   // Full length of the input text (bigger than a chunk)
    1746             : 
    1747             :     // clip the requested index to the limits of the text.
    1748           0 :     int32_t index32 = pinIndex(index, length);
    1749           0 :     U_ASSERT(index<=INT32_MAX);
    1750             : 
    1751             : 
    1752             :     /*
    1753             :      * Compute start/limit boundaries around index, for a segment of text
    1754             :      * to be extracted.
    1755             :      * To allow for the possibility that our user gave an index to the trailing
    1756             :      * half of a surrogate pair, we must request one extra preceding UChar when
    1757             :      * going in the forward direction.  This will ensure that the buffer has the
    1758             :      * entire code point at the specified index.
    1759             :      */
    1760           0 :     if(forward) {
    1761             : 
    1762           0 :         if (index32>=ut->chunkNativeStart && index32<ut->chunkNativeLimit) {
    1763             :             // Buffer already contains the requested position.
    1764           0 :             ut->chunkOffset = (int32_t)(index - ut->chunkNativeStart);
    1765           0 :             return TRUE;
    1766             :         }
    1767           0 :         if (index32>=length && ut->chunkNativeLimit==length) {
    1768             :             // Request for end of string, and buffer already extends up to it.
    1769             :             // Can't get the data, but don't change the buffer.
    1770           0 :             ut->chunkOffset = length - (int32_t)ut->chunkNativeStart;
    1771           0 :             return FALSE;
    1772             :         }
    1773             : 
    1774           0 :         ut->chunkNativeLimit = index + REP_TEXT_CHUNK_SIZE - 1;
    1775             :         // Going forward, so we want to have the buffer with stuff at and beyond
    1776             :         //   the requested index.  The -1 gets us one code point before the
    1777             :         //   requested index also, to handle the case of the index being on
    1778             :         //   a trail surrogate of a surrogate pair.
    1779           0 :         if(ut->chunkNativeLimit > length) {
    1780           0 :             ut->chunkNativeLimit = length;
    1781             :         }
    1782             :         // unless buffer ran off end, start is index-1.
    1783           0 :         ut->chunkNativeStart = ut->chunkNativeLimit - REP_TEXT_CHUNK_SIZE;
    1784           0 :         if(ut->chunkNativeStart < 0) {
    1785           0 :             ut->chunkNativeStart = 0;
    1786             :         }
    1787             :     } else {
    1788             :         // Reverse iteration.  Fill buffer with data preceding the requested index.
    1789           0 :         if (index32>ut->chunkNativeStart && index32<=ut->chunkNativeLimit) {
    1790             :             // Requested position already in buffer.
    1791           0 :             ut->chunkOffset = index32 - (int32_t)ut->chunkNativeStart;
    1792           0 :             return TRUE;
    1793             :         }
    1794           0 :         if (index32==0 && ut->chunkNativeStart==0) {
    1795             :             // Request for start, buffer already begins at start.
    1796             :             //  No data, but keep the buffer as is.
    1797           0 :             ut->chunkOffset = 0;
    1798           0 :             return FALSE;
    1799             :         }
    1800             : 
    1801             :         // Figure out the bounds of the chunk to extract for reverse iteration.
    1802             :         // Need to worry about chunk not splitting surrogate pairs, and while still
    1803             :         // containing the data we need.
    1804             :         // Fix by requesting a chunk that includes an extra UChar at the end.
    1805             :         // If this turns out to be a lead surrogate, we can lop it off and still have
    1806             :         //   the data we wanted.
    1807           0 :         ut->chunkNativeStart = index32 + 1 - REP_TEXT_CHUNK_SIZE;
    1808           0 :         if (ut->chunkNativeStart < 0) {
    1809           0 :             ut->chunkNativeStart = 0;
    1810             :         }
    1811             : 
    1812           0 :         ut->chunkNativeLimit = index32 + 1;
    1813           0 :         if (ut->chunkNativeLimit > length) {
    1814           0 :             ut->chunkNativeLimit = length;
    1815             :         }
    1816             :     }
    1817             : 
    1818             :     // Extract the new chunk of text from the Replaceable source.
    1819           0 :     ReplExtra *ex = (ReplExtra *)ut->pExtra;
    1820             :     // UnicodeString with its buffer a writable alias to the chunk buffer
    1821           0 :     UnicodeString buffer(ex->s, 0 /*buffer length*/, REP_TEXT_CHUNK_SIZE /*buffer capacity*/);
    1822           0 :     rep->extractBetween((int32_t)ut->chunkNativeStart, (int32_t)ut->chunkNativeLimit, buffer);
    1823             : 
    1824           0 :     ut->chunkContents  = ex->s;
    1825           0 :     ut->chunkLength    = (int32_t)(ut->chunkNativeLimit - ut->chunkNativeStart);
    1826           0 :     ut->chunkOffset    = (int32_t)(index32 - ut->chunkNativeStart);
    1827             : 
    1828             :     // Surrogate pairs from the input text must not span chunk boundaries.
    1829             :     // If end of chunk could be the start of a surrogate, trim it off.
    1830           0 :     if (ut->chunkNativeLimit < length &&
    1831           0 :         U16_IS_LEAD(ex->s[ut->chunkLength-1])) {
    1832           0 :             ut->chunkLength--;
    1833           0 :             ut->chunkNativeLimit--;
    1834           0 :             if (ut->chunkOffset > ut->chunkLength) {
    1835           0 :                 ut->chunkOffset = ut->chunkLength;
    1836             :             }
    1837             :         }
    1838             : 
    1839             :     // if the first UChar in the chunk could be the trailing half of a surrogate pair,
    1840             :     // trim it off.
    1841           0 :     if(ut->chunkNativeStart>0 && U16_IS_TRAIL(ex->s[0])) {
    1842           0 :         ++(ut->chunkContents);
    1843           0 :         ++(ut->chunkNativeStart);
    1844           0 :         --(ut->chunkLength);
    1845           0 :         --(ut->chunkOffset);
    1846             :     }
    1847             : 
    1848             :     // adjust the index/chunkOffset to a code point boundary
    1849           0 :     U16_SET_CP_START(ut->chunkContents, 0, ut->chunkOffset);
    1850             : 
    1851             :     // Use fast indexing for get/setNativeIndex()
    1852           0 :     ut->nativeIndexingLimit = ut->chunkLength;
    1853             : 
    1854           0 :     return TRUE;
    1855             : }
    1856             : 
    1857             : 
    1858             : 
    1859             : static int32_t U_CALLCONV
    1860           0 : repTextExtract(UText *ut,
    1861             :                int64_t start, int64_t limit,
    1862             :                UChar *dest, int32_t destCapacity,
    1863             :                UErrorCode *status) {
    1864           0 :     const Replaceable *rep=(const Replaceable *)ut->context;
    1865           0 :     int32_t  length=rep->length();
    1866             : 
    1867           0 :     if(U_FAILURE(*status)) {
    1868           0 :         return 0;
    1869             :     }
    1870           0 :     if(destCapacity<0 || (dest==NULL && destCapacity>0)) {
    1871           0 :         *status=U_ILLEGAL_ARGUMENT_ERROR;
    1872             :     }
    1873           0 :     if(start>limit) {
    1874           0 :         *status=U_INDEX_OUTOFBOUNDS_ERROR;
    1875           0 :         return 0;
    1876             :     }
    1877             : 
    1878           0 :     int32_t  start32 = pinIndex(start, length);
    1879           0 :     int32_t  limit32 = pinIndex(limit, length);
    1880             : 
    1881             :     // adjust start, limit if they point to trail half of surrogates
    1882           0 :     if (start32<length && U16_IS_TRAIL(rep->charAt(start32)) &&
    1883           0 :         U_IS_SUPPLEMENTARY(rep->char32At(start32))){
    1884           0 :             start32--;
    1885             :     }
    1886           0 :     if (limit32<length && U16_IS_TRAIL(rep->charAt(limit32)) &&
    1887           0 :         U_IS_SUPPLEMENTARY(rep->char32At(limit32))){
    1888           0 :             limit32--;
    1889             :     }
    1890             : 
    1891           0 :     length=limit32-start32;
    1892           0 :     if(length>destCapacity) {
    1893           0 :         limit32 = start32 + destCapacity;
    1894             :     }
    1895           0 :     UnicodeString buffer(dest, 0, destCapacity); // writable alias
    1896           0 :     rep->extractBetween(start32, limit32, buffer);
    1897           0 :     repTextAccess(ut, limit32, TRUE);
    1898             : 
    1899           0 :     return u_terminateUChars(dest, destCapacity, length, status);
    1900             : }
    1901             : 
    1902             : static int32_t U_CALLCONV
    1903           0 : repTextReplace(UText *ut,
    1904             :                int64_t start, int64_t limit,
    1905             :                const UChar *src, int32_t length,
    1906             :                UErrorCode *status) {
    1907           0 :     Replaceable *rep=(Replaceable *)ut->context;
    1908             :     int32_t oldLength;
    1909             : 
    1910           0 :     if(U_FAILURE(*status)) {
    1911           0 :         return 0;
    1912             :     }
    1913           0 :     if(src==NULL && length!=0) {
    1914           0 :         *status=U_ILLEGAL_ARGUMENT_ERROR;
    1915           0 :         return 0;
    1916             :     }
    1917           0 :     oldLength=rep->length(); // will subtract from new length
    1918           0 :     if(start>limit ) {
    1919           0 :         *status=U_INDEX_OUTOFBOUNDS_ERROR;
    1920           0 :         return 0;
    1921             :     }
    1922             : 
    1923           0 :     int32_t start32 = pinIndex(start, oldLength);
    1924           0 :     int32_t limit32 = pinIndex(limit, oldLength);
    1925             : 
    1926             :     // Snap start & limit to code point boundaries.
    1927           0 :     if (start32<oldLength && U16_IS_TRAIL(rep->charAt(start32)) &&
    1928           0 :         start32>0 && U16_IS_LEAD(rep->charAt(start32-1)))
    1929             :     {
    1930           0 :             start32--;
    1931             :     }
    1932           0 :     if (limit32<oldLength && U16_IS_LEAD(rep->charAt(limit32-1)) &&
    1933           0 :         U16_IS_TRAIL(rep->charAt(limit32)))
    1934             :     {
    1935           0 :             limit32++;
    1936             :     }
    1937             : 
    1938             :     // Do the actual replace operation using methods of the Replaceable class
    1939           0 :     UnicodeString replStr((UBool)(length<0), src, length); // read-only alias
    1940           0 :     rep->handleReplaceBetween(start32, limit32, replStr);
    1941           0 :     int32_t newLength = rep->length();
    1942           0 :     int32_t lengthDelta = newLength - oldLength;
    1943             : 
    1944             :     // Is the UText chunk buffer OK?
    1945           0 :     if (ut->chunkNativeLimit > start32) {
    1946             :         // this replace operation may have impacted the current chunk.
    1947             :         // invalidate it, which will force a reload on the next access.
    1948           0 :         invalidateChunk(ut);
    1949             :     }
    1950             : 
    1951             :     // set the iteration position to the end of the newly inserted replacement text.
    1952           0 :     int32_t newIndexPos = limit32 + lengthDelta;
    1953           0 :     repTextAccess(ut, newIndexPos, TRUE);
    1954             : 
    1955           0 :     return lengthDelta;
    1956             : }
    1957             : 
    1958             : 
    1959             : static void U_CALLCONV
    1960           0 : repTextCopy(UText *ut,
    1961             :                 int64_t start, int64_t limit,
    1962             :                 int64_t destIndex,
    1963             :                 UBool move,
    1964             :                 UErrorCode *status)
    1965             : {
    1966           0 :     Replaceable *rep=(Replaceable *)ut->context;
    1967           0 :     int32_t length=rep->length();
    1968             : 
    1969           0 :     if(U_FAILURE(*status)) {
    1970           0 :         return;
    1971             :     }
    1972           0 :     if (start>limit || (start<destIndex && destIndex<limit))
    1973             :     {
    1974           0 :         *status=U_INDEX_OUTOFBOUNDS_ERROR;
    1975           0 :         return;
    1976             :     }
    1977             : 
    1978           0 :     int32_t start32     = pinIndex(start, length);
    1979           0 :     int32_t limit32     = pinIndex(limit, length);
    1980           0 :     int32_t destIndex32 = pinIndex(destIndex, length);
    1981             : 
    1982             :     // TODO:  snap input parameters to code point boundaries.
    1983             : 
    1984           0 :     if(move) {
    1985             :         // move: copy to destIndex, then replace original with nothing
    1986           0 :         int32_t segLength=limit32-start32;
    1987           0 :         rep->copy(start32, limit32, destIndex32);
    1988           0 :         if(destIndex32<start32) {
    1989           0 :             start32+=segLength;
    1990           0 :             limit32+=segLength;
    1991             :         }
    1992           0 :         rep->handleReplaceBetween(start32, limit32, UnicodeString());
    1993             :     } else {
    1994             :         // copy
    1995           0 :         rep->copy(start32, limit32, destIndex32);
    1996             :     }
    1997             : 
    1998             :     // If the change to the text touched the region in the chunk buffer,
    1999             :     //  invalidate the buffer.
    2000           0 :     int32_t firstAffectedIndex = destIndex32;
    2001           0 :     if (move && start32<firstAffectedIndex) {
    2002           0 :         firstAffectedIndex = start32;
    2003             :     }
    2004           0 :     if (firstAffectedIndex < ut->chunkNativeLimit) {
    2005             :         // changes may have affected range covered by the chunk
    2006           0 :         invalidateChunk(ut);
    2007             :     }
    2008             : 
    2009             :     // Put iteration position at the newly inserted (moved) block,
    2010           0 :     int32_t  nativeIterIndex = destIndex32 + limit32 - start32;
    2011           0 :     if (move && destIndex32>start32) {
    2012             :         // moved a block of text towards the end of the string.
    2013           0 :         nativeIterIndex = destIndex32;
    2014             :     }
    2015             : 
    2016             :     // Set position, reload chunk if needed.
    2017           0 :     repTextAccess(ut, nativeIterIndex, TRUE);
    2018             : }
    2019             : 
    2020             : static const struct UTextFuncs repFuncs =
    2021             : {
    2022             :     sizeof(UTextFuncs),
    2023             :     0, 0, 0,           // Reserved alignment padding
    2024             :     repTextClone,
    2025             :     repTextLength,
    2026             :     repTextAccess,
    2027             :     repTextExtract,
    2028             :     repTextReplace,
    2029             :     repTextCopy,
    2030             :     NULL,              // MapOffsetToNative,
    2031             :     NULL,              // MapIndexToUTF16,
    2032             :     repTextClose,
    2033             :     NULL,              // spare 1
    2034             :     NULL,              // spare 2
    2035             :     NULL               // spare 3
    2036             : };
    2037             : 
    2038             : 
    2039             : U_CAPI UText * U_EXPORT2
    2040           0 : utext_openReplaceable(UText *ut, Replaceable *rep, UErrorCode *status)
    2041             : {
    2042           0 :     if(U_FAILURE(*status)) {
    2043           0 :         return NULL;
    2044             :     }
    2045           0 :     if(rep==NULL) {
    2046           0 :         *status=U_ILLEGAL_ARGUMENT_ERROR;
    2047           0 :         return NULL;
    2048             :     }
    2049           0 :     ut = utext_setup(ut, sizeof(ReplExtra), status);
    2050           0 :     if(U_FAILURE(*status)) {
    2051           0 :         return ut;
    2052             :     }
    2053             : 
    2054           0 :     ut->providerProperties = I32_FLAG(UTEXT_PROVIDER_WRITABLE);
    2055           0 :     if(rep->hasMetaData()) {
    2056           0 :         ut->providerProperties |=I32_FLAG(UTEXT_PROVIDER_HAS_META_DATA);
    2057             :     }
    2058             : 
    2059           0 :     ut->pFuncs  = &repFuncs;
    2060           0 :     ut->context =  rep;
    2061           0 :     return ut;
    2062             : }
    2063             : 
    2064             : U_CDECL_END
    2065             : 
    2066             : 
    2067             : 
    2068             : 
    2069             : 
    2070             : 
    2071             : 
    2072             : 
    2073             : //------------------------------------------------------------------------------
    2074             : //
    2075             : //     UText implementation for UnicodeString (read/write)  and
    2076             : //                    for const UnicodeString (read only)
    2077             : //             (same implementation, only the flags are different)
    2078             : //
    2079             : //         Use of UText data members:
    2080             : //            context    pointer to UnicodeString
    2081             : //            p          pointer to UnicodeString IF this UText owns the string
    2082             : //                       and it must be deleted on close().  NULL otherwise.
    2083             : //
    2084             : //------------------------------------------------------------------------------
    2085             : 
    2086             : U_CDECL_BEGIN
    2087             : 
    2088             : 
    2089             : static UText * U_CALLCONV
    2090           0 : unistrTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status) {
    2091             :     // First do a generic shallow clone.  Does everything needed for the UText struct itself.
    2092           0 :     dest = shallowTextClone(dest, src, status);
    2093             : 
    2094             :     // For deep clones, make a copy of the UnicodeSring.
    2095             :     //  The copied UnicodeString storage is owned by the newly created UText clone.
    2096             :     //  A non-NULL pointer in UText.p is the signal to the close() function to delete
    2097             :     //    the UText.
    2098             :     //
    2099           0 :     if (deep && U_SUCCESS(*status)) {
    2100           0 :         const UnicodeString *srcString = (const UnicodeString *)src->context;
    2101           0 :         dest->context = new UnicodeString(*srcString);
    2102           0 :         dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT);
    2103             : 
    2104             :         // with deep clone, the copy is writable, even when the source is not.
    2105           0 :         dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_WRITABLE);
    2106             :     }
    2107           0 :     return dest;
    2108             : }
    2109             : 
    2110             : static void U_CALLCONV
    2111           0 : unistrTextClose(UText *ut) {
    2112             :     // Most of the work of close is done by the generic UText framework close.
    2113             :     // All that needs to be done here is delete the UnicodeString if the UText
    2114             :     //  owns it.  This occurs if the UText was created by cloning.
    2115           0 :     if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)) {
    2116           0 :         UnicodeString *str = (UnicodeString *)ut->context;
    2117           0 :         delete str;
    2118           0 :         ut->context = NULL;
    2119             :     }
    2120           0 : }
    2121             : 
    2122             : 
    2123             : static int64_t U_CALLCONV
    2124           0 : unistrTextLength(UText *t) {
    2125           0 :     return ((const UnicodeString *)t->context)->length();
    2126             : }
    2127             : 
    2128             : 
    2129             : static UBool U_CALLCONV
    2130           0 : unistrTextAccess(UText *ut, int64_t index, UBool  forward) {
    2131           0 :     int32_t length  = ut->chunkLength;
    2132           0 :     ut->chunkOffset = pinIndex(index, length);
    2133             : 
    2134             :     // Check whether request is at the start or end
    2135           0 :     UBool retVal = (forward && index<length) || (!forward && index>0);
    2136           0 :     return retVal;
    2137             : }
    2138             : 
    2139             : 
    2140             : 
    2141             : static int32_t U_CALLCONV
    2142           0 : unistrTextExtract(UText *t,
    2143             :                   int64_t start, int64_t limit,
    2144             :                   UChar *dest, int32_t destCapacity,
    2145             :                   UErrorCode *pErrorCode) {
    2146           0 :     const UnicodeString *us=(const UnicodeString *)t->context;
    2147           0 :     int32_t length=us->length();
    2148             : 
    2149           0 :     if(U_FAILURE(*pErrorCode)) {
    2150           0 :         return 0;
    2151             :     }
    2152           0 :     if(destCapacity<0 || (dest==NULL && destCapacity>0)) {
    2153           0 :         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    2154             :     }
    2155           0 :     if(start<0 || start>limit) {
    2156           0 :         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    2157           0 :         return 0;
    2158             :     }
    2159             : 
    2160           0 :     int32_t start32 = start<length ? us->getChar32Start((int32_t)start) : length;
    2161           0 :     int32_t limit32 = limit<length ? us->getChar32Start((int32_t)limit) : length;
    2162             : 
    2163           0 :     length=limit32-start32;
    2164           0 :     if (destCapacity>0 && dest!=NULL) {
    2165           0 :         int32_t trimmedLength = length;
    2166           0 :         if(trimmedLength>destCapacity) {
    2167           0 :             trimmedLength=destCapacity;
    2168             :         }
    2169           0 :         us->extract(start32, trimmedLength, dest);
    2170           0 :         t->chunkOffset = start32+trimmedLength;
    2171             :     } else {
    2172           0 :         t->chunkOffset = start32;
    2173             :     }
    2174           0 :     u_terminateUChars(dest, destCapacity, length, pErrorCode);
    2175           0 :     return length;
    2176             : }
    2177             : 
    2178             : static int32_t U_CALLCONV
    2179           0 : unistrTextReplace(UText *ut,
    2180             :                   int64_t start, int64_t limit,
    2181             :                   const UChar *src, int32_t length,
    2182             :                   UErrorCode *pErrorCode) {
    2183           0 :     UnicodeString *us=(UnicodeString *)ut->context;
    2184             :     int32_t oldLength;
    2185             : 
    2186           0 :     if(U_FAILURE(*pErrorCode)) {
    2187           0 :         return 0;
    2188             :     }
    2189           0 :     if(src==NULL && length!=0) {
    2190           0 :         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    2191             :     }
    2192           0 :     if(start>limit) {
    2193           0 :         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    2194           0 :         return 0;
    2195             :     }
    2196           0 :     oldLength=us->length();
    2197           0 :     int32_t start32 = pinIndex(start, oldLength);
    2198           0 :     int32_t limit32 = pinIndex(limit, oldLength);
    2199           0 :     if (start32 < oldLength) {
    2200           0 :         start32 = us->getChar32Start(start32);
    2201             :     }
    2202           0 :     if (limit32 < oldLength) {
    2203           0 :         limit32 = us->getChar32Start(limit32);
    2204             :     }
    2205             : 
    2206             :     // replace
    2207           0 :     us->replace(start32, limit32-start32, src, length);
    2208           0 :     int32_t newLength = us->length();
    2209             : 
    2210             :     // Update the chunk description.
    2211           0 :     ut->chunkContents    = us->getBuffer();
    2212           0 :     ut->chunkLength      = newLength;
    2213           0 :     ut->chunkNativeLimit = newLength;
    2214           0 :     ut->nativeIndexingLimit = newLength;
    2215             : 
    2216             :     // Set iteration position to the point just following the newly inserted text.
    2217           0 :     int32_t lengthDelta = newLength - oldLength;
    2218           0 :     ut->chunkOffset = limit32 + lengthDelta;
    2219             : 
    2220           0 :     return lengthDelta;
    2221             : }
    2222             : 
    2223             : static void U_CALLCONV
    2224           0 : unistrTextCopy(UText *ut,
    2225             :                int64_t start, int64_t limit,
    2226             :                int64_t destIndex,
    2227             :                UBool move,
    2228             :                UErrorCode *pErrorCode) {
    2229           0 :     UnicodeString *us=(UnicodeString *)ut->context;
    2230           0 :     int32_t length=us->length();
    2231             : 
    2232           0 :     if(U_FAILURE(*pErrorCode)) {
    2233           0 :         return;
    2234             :     }
    2235           0 :     int32_t start32 = pinIndex(start, length);
    2236           0 :     int32_t limit32 = pinIndex(limit, length);
    2237           0 :     int32_t destIndex32 = pinIndex(destIndex, length);
    2238             : 
    2239           0 :     if( start32>limit32 || (start32<destIndex32 && destIndex32<limit32)) {
    2240           0 :         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    2241           0 :         return;
    2242             :     }
    2243             : 
    2244           0 :     if(move) {
    2245             :         // move: copy to destIndex, then remove original
    2246           0 :         int32_t segLength=limit32-start32;
    2247           0 :         us->copy(start32, limit32, destIndex32);
    2248           0 :         if(destIndex32<start32) {
    2249           0 :             start32+=segLength;
    2250             :         }
    2251           0 :         us->remove(start32, segLength);
    2252             :     } else {
    2253             :         // copy
    2254           0 :         us->copy(start32, limit32, destIndex32);
    2255             :     }
    2256             : 
    2257             :     // update chunk description, set iteration position.
    2258           0 :     ut->chunkContents = us->getBuffer();
    2259           0 :     if (move==FALSE) {
    2260             :         // copy operation, string length grows
    2261           0 :         ut->chunkLength += limit32-start32;
    2262           0 :         ut->chunkNativeLimit = ut->chunkLength;
    2263           0 :         ut->nativeIndexingLimit = ut->chunkLength;
    2264             :     }
    2265             : 
    2266             :     // Iteration position to end of the newly inserted text.
    2267           0 :     ut->chunkOffset = destIndex32+limit32-start32;
    2268           0 :     if (move && destIndex32>start32) {
    2269           0 :         ut->chunkOffset = destIndex32;
    2270             :     }
    2271             : 
    2272             : }
    2273             : 
    2274             : static const struct UTextFuncs unistrFuncs =
    2275             : {
    2276             :     sizeof(UTextFuncs),
    2277             :     0, 0, 0,             // Reserved alignment padding
    2278             :     unistrTextClone,
    2279             :     unistrTextLength,
    2280             :     unistrTextAccess,
    2281             :     unistrTextExtract,
    2282             :     unistrTextReplace,
    2283             :     unistrTextCopy,
    2284             :     NULL,                // MapOffsetToNative,
    2285             :     NULL,                // MapIndexToUTF16,
    2286             :     unistrTextClose,
    2287             :     NULL,                // spare 1
    2288             :     NULL,                // spare 2
    2289             :     NULL                 // spare 3
    2290             : };
    2291             : 
    2292             : 
    2293             : 
    2294             : U_CDECL_END
    2295             : 
    2296             : 
    2297             : U_CAPI UText * U_EXPORT2
    2298           0 : utext_openUnicodeString(UText *ut, UnicodeString *s, UErrorCode *status) {
    2299           0 :     ut = utext_openConstUnicodeString(ut, s, status);
    2300           0 :     if (U_SUCCESS(*status)) {
    2301           0 :         ut->providerProperties |= I32_FLAG(UTEXT_PROVIDER_WRITABLE);
    2302             :     }
    2303           0 :     return ut;
    2304             : }
    2305             : 
    2306             : 
    2307             : 
    2308             : U_CAPI UText * U_EXPORT2
    2309           0 : utext_openConstUnicodeString(UText *ut, const UnicodeString *s, UErrorCode *status) {
    2310           0 :     if (U_SUCCESS(*status) && s->isBogus()) {
    2311             :         // The UnicodeString is bogus, but we still need to detach the UText
    2312             :         //   from whatever it was hooked to before, if anything.
    2313           0 :         utext_openUChars(ut, NULL, 0, status);
    2314           0 :         *status = U_ILLEGAL_ARGUMENT_ERROR;
    2315           0 :         return ut;
    2316             :     }
    2317           0 :     ut = utext_setup(ut, 0, status);
    2318             :     //    note:  use the standard (writable) function table for UnicodeString.
    2319             :     //           The flag settings disable writing, so having the functions in
    2320             :     //           the table is harmless.
    2321           0 :     if (U_SUCCESS(*status)) {
    2322           0 :         ut->pFuncs              = &unistrFuncs;
    2323           0 :         ut->context             = s;
    2324           0 :         ut->providerProperties  = I32_FLAG(UTEXT_PROVIDER_STABLE_CHUNKS);
    2325           0 :         ut->chunkContents       = s->getBuffer();
    2326           0 :         ut->chunkLength         = s->length();
    2327           0 :         ut->chunkNativeStart    = 0;
    2328           0 :         ut->chunkNativeLimit    = ut->chunkLength;
    2329           0 :         ut->nativeIndexingLimit = ut->chunkLength;
    2330             :     }
    2331           0 :     return ut;
    2332             : }
    2333             : 
    2334             : //------------------------------------------------------------------------------
    2335             : //
    2336             : //     UText implementation for const UChar * strings
    2337             : //
    2338             : //         Use of UText data members:
    2339             : //            context    pointer to UnicodeString
    2340             : //            a          length.  -1 if not yet known.
    2341             : //
    2342             : //         TODO:  support 64 bit lengths.
    2343             : //
    2344             : //------------------------------------------------------------------------------
    2345             : 
    2346             : U_CDECL_BEGIN
    2347             : 
    2348             : 
    2349             : static UText * U_CALLCONV
    2350           0 : ucstrTextClone(UText *dest, const UText * src, UBool deep, UErrorCode * status) {
    2351             :     // First do a generic shallow clone.
    2352           0 :     dest = shallowTextClone(dest, src, status);
    2353             : 
    2354             :     // For deep clones, make a copy of the string.
    2355             :     //  The copied storage is owned by the newly created clone.
    2356             :     //  A non-NULL pointer in UText.p is the signal to the close() function to delete
    2357             :     //    it.
    2358             :     //
    2359           0 :     if (deep && U_SUCCESS(*status)) {
    2360           0 :         U_ASSERT(utext_nativeLength(dest) < INT32_MAX);
    2361           0 :         int32_t  len = (int32_t)utext_nativeLength(dest);
    2362             : 
    2363             :         // The cloned string IS going to be NUL terminated, whether or not the original was.
    2364           0 :         const UChar *srcStr = (const UChar *)src->context;
    2365           0 :         UChar *copyStr = (UChar *)uprv_malloc((len+1) * sizeof(UChar));
    2366           0 :         if (copyStr == NULL) {
    2367           0 :             *status = U_MEMORY_ALLOCATION_ERROR;
    2368             :         } else {
    2369             :             int64_t i;
    2370           0 :             for (i=0; i<len; i++) {
    2371           0 :                 copyStr[i] = srcStr[i];
    2372             :             }
    2373           0 :             copyStr[len] = 0;
    2374           0 :             dest->context = copyStr;
    2375           0 :             dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT);
    2376             :         }
    2377             :     }
    2378           0 :     return dest;
    2379             : }
    2380             : 
    2381             : 
    2382             : static void U_CALLCONV
    2383           0 : ucstrTextClose(UText *ut) {
    2384             :     // Most of the work of close is done by the generic UText framework close.
    2385             :     // All that needs to be done here is delete the string if the UText
    2386             :     //  owns it.  This occurs if the UText was created by cloning.
    2387           0 :     if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)) {
    2388           0 :         UChar *s = (UChar *)ut->context;
    2389           0 :         uprv_free(s);
    2390           0 :         ut->context = NULL;
    2391             :     }
    2392           0 : }
    2393             : 
    2394             : 
    2395             : 
    2396             : static int64_t U_CALLCONV
    2397           0 : ucstrTextLength(UText *ut) {
    2398           0 :     if (ut->a < 0) {
    2399             :         // null terminated, we don't yet know the length.  Scan for it.
    2400             :         //    Access is not convenient for doing this
    2401             :         //    because the current interation postion can't be changed.
    2402           0 :         const UChar  *str = (const UChar *)ut->context;
    2403             :         for (;;) {
    2404           0 :             if (str[ut->chunkNativeLimit] == 0) {
    2405           0 :                 break;
    2406             :             }
    2407           0 :             ut->chunkNativeLimit++;
    2408             :         }
    2409           0 :         ut->a = ut->chunkNativeLimit;
    2410           0 :         ut->chunkLength = (int32_t)ut->chunkNativeLimit;
    2411           0 :         ut->nativeIndexingLimit = ut->chunkLength;
    2412           0 :         ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);
    2413             :     }
    2414           0 :     return ut->a;
    2415             : }
    2416             : 
    2417             : 
    2418             : static UBool U_CALLCONV
    2419           0 : ucstrTextAccess(UText *ut, int64_t index, UBool  forward) {
    2420           0 :     const UChar *str   = (const UChar *)ut->context;
    2421             : 
    2422             :     // pin the requested index to the bounds of the string,
    2423             :     //  and set current iteration position.
    2424           0 :     if (index<0) {
    2425           0 :         index = 0;
    2426           0 :     } else if (index < ut->chunkNativeLimit) {
    2427             :         // The request data is within the chunk as it is known so far.
    2428             :         // Put index on a code point boundary.
    2429           0 :         U16_SET_CP_START(str, 0, index);
    2430           0 :     } else if (ut->a >= 0) {
    2431             :         // We know the length of this string, and the user is requesting something
    2432             :         // at or beyond the length.  Pin the requested index to the length.
    2433           0 :         index = ut->a;
    2434             :     } else {
    2435             :         // Null terminated string, length not yet known, and the requested index
    2436             :         //  is beyond where we have scanned so far.
    2437             :         //  Scan to 32 UChars beyond the requested index.  The strategy here is
    2438             :         //  to avoid fully scanning a long string when the caller only wants to
    2439             :         //  see a few characters at its beginning.
    2440           0 :         int32_t scanLimit = (int32_t)index + 32;
    2441           0 :         if ((index + 32)>INT32_MAX || (index + 32)<0 ) {   // note: int64 expression
    2442           0 :             scanLimit = INT32_MAX;
    2443             :         }
    2444             : 
    2445           0 :         int32_t chunkLimit = (int32_t)ut->chunkNativeLimit;
    2446           0 :         for (; chunkLimit<scanLimit; chunkLimit++) {
    2447           0 :             if (str[chunkLimit] == 0) {
    2448             :                 // We found the end of the string.  Remember it, pin the requested index to it,
    2449             :                 //  and bail out of here.
    2450           0 :                 ut->a = chunkLimit;
    2451           0 :                 ut->chunkLength = chunkLimit;
    2452           0 :                 ut->nativeIndexingLimit = chunkLimit;
    2453           0 :                 if (index >= chunkLimit) {
    2454           0 :                     index = chunkLimit;
    2455             :                 } else {
    2456           0 :                     U16_SET_CP_START(str, 0, index);
    2457             :                 }
    2458             : 
    2459           0 :                 ut->chunkNativeLimit = chunkLimit;
    2460           0 :                 ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);
    2461           0 :                 goto breakout;
    2462             :             }
    2463             :         }
    2464             :         // We scanned through the next batch of UChars without finding the end.
    2465           0 :         U16_SET_CP_START(str, 0, index);
    2466           0 :         if (chunkLimit == INT32_MAX) {
    2467             :             // Scanned to the limit of a 32 bit length.
    2468             :             // Forceably trim the overlength string back so length fits in int32
    2469             :             //  TODO:  add support for 64 bit strings.
    2470           0 :             ut->a = chunkLimit;
    2471           0 :             ut->chunkLength = chunkLimit;
    2472           0 :             ut->nativeIndexingLimit = chunkLimit;
    2473           0 :             if (index > chunkLimit) {
    2474           0 :                 index = chunkLimit;
    2475             :             }
    2476           0 :             ut->chunkNativeLimit = chunkLimit;
    2477           0 :             ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);
    2478             :         } else {
    2479             :             // The endpoint of a chunk must not be left in the middle of a surrogate pair.
    2480             :             // If the current end is on a lead surrogate, back the end up by one.
    2481             :             // It doesn't matter if the end char happens to be an unpaired surrogate,
    2482             :             //    and it's simpler not to worry about it.
    2483           0 :             if (U16_IS_LEAD(str[chunkLimit-1])) {
    2484           0 :                 --chunkLimit;
    2485             :             }
    2486             :             // Null-terminated chunk with end still unknown.
    2487             :             // Update the chunk length to reflect what has been scanned thus far.
    2488             :             // That the full length is still unknown is (still) flagged by
    2489             :             //    ut->a being < 0.
    2490           0 :             ut->chunkNativeLimit = chunkLimit;
    2491           0 :             ut->nativeIndexingLimit = chunkLimit;
    2492           0 :             ut->chunkLength = chunkLimit;
    2493             :         }
    2494             : 
    2495             :     }
    2496             : breakout:
    2497           0 :     U_ASSERT(index<=INT32_MAX);
    2498           0 :     ut->chunkOffset = (int32_t)index;
    2499             : 
    2500             :     // Check whether request is at the start or end
    2501           0 :     UBool retVal = (forward && index<ut->chunkNativeLimit) || (!forward && index>0);
    2502           0 :     return retVal;
    2503             : }
    2504             : 
    2505             : 
    2506             : 
    2507             : static int32_t U_CALLCONV
    2508           0 : ucstrTextExtract(UText *ut,
    2509             :                   int64_t start, int64_t limit,
    2510             :                   UChar *dest, int32_t destCapacity,
    2511             :                   UErrorCode *pErrorCode)
    2512             : {
    2513           0 :     if(U_FAILURE(*pErrorCode)) {
    2514           0 :         return 0;
    2515             :     }
    2516           0 :     if(destCapacity<0 || (dest==NULL && destCapacity>0) || start>limit) {
    2517           0 :         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    2518           0 :         return 0;
    2519             :     }
    2520             : 
    2521             :     //const UChar *s=(const UChar *)ut->context;
    2522             :     int32_t si, di;
    2523             : 
    2524             :     int32_t start32;
    2525             :     int32_t limit32;
    2526             : 
    2527             :     // Access the start.  Does two things we need:
    2528             :     //   Pins 'start' to the length of the string, if it came in out-of-bounds.
    2529             :     //   Snaps 'start' to the beginning of a code point.
    2530           0 :     ucstrTextAccess(ut, start, TRUE);
    2531           0 :     const UChar *s=ut->chunkContents;
    2532           0 :     start32 = ut->chunkOffset;
    2533             : 
    2534           0 :     int32_t strLength=(int32_t)ut->a;
    2535           0 :     if (strLength >= 0) {
    2536           0 :         limit32 = pinIndex(limit, strLength);
    2537             :     } else {
    2538           0 :         limit32 = pinIndex(limit, INT32_MAX);
    2539             :     }
    2540           0 :     di = 0;
    2541           0 :     for (si=start32; si<limit32; si++) {
    2542           0 :         if (strLength<0 && s[si]==0) {
    2543             :             // Just hit the end of a null-terminated string.
    2544           0 :             ut->a = si;               // set string length for this UText
    2545           0 :             ut->chunkNativeLimit    = si;
    2546           0 :             ut->chunkLength         = si;
    2547           0 :             ut->nativeIndexingLimit = si;
    2548           0 :             strLength               = si;
    2549           0 :             limit32                 = si;
    2550           0 :             break;
    2551             :         }
    2552           0 :         U_ASSERT(di>=0); /* to ensure di never exceeds INT32_MAX, which must not happen logically */
    2553           0 :         if (di<destCapacity) {
    2554             :             // only store if there is space.
    2555           0 :             dest[di] = s[si];
    2556             :         } else {
    2557           0 :             if (strLength>=0) {
    2558             :                 // We have filled the destination buffer, and the string length is known.
    2559             :                 //  Cut the loop short.  There is no need to scan string termination.
    2560           0 :                 di = limit32 - start32;
    2561           0 :                 si = limit32;
    2562           0 :                 break;
    2563             :             }
    2564             :         }
    2565           0 :         di++;
    2566             :     }
    2567             : 
    2568             :     // If the limit index points to a lead surrogate of a pair,
    2569             :     //   add the corresponding trail surrogate to the destination.
    2570           0 :     if (si>0 && U16_IS_LEAD(s[si-1]) &&
    2571           0 :             ((si<strLength || strLength<0)  && U16_IS_TRAIL(s[si])))
    2572             :     {
    2573           0 :         if (di<destCapacity) {
    2574             :             // store only if there is space in the output buffer.
    2575           0 :             dest[di++] = s[si];
    2576             :         }
    2577           0 :         si++;
    2578             :     }
    2579             : 
    2580             :     // Put iteration position at the point just following the extracted text
    2581           0 :     if (si <= ut->chunkNativeLimit) {
    2582           0 :         ut->chunkOffset = si;
    2583             :     } else {
    2584           0 :         ucstrTextAccess(ut, si, TRUE);
    2585             :     }
    2586             : 
    2587             :     // Add a terminating NUL if space in the buffer permits,
    2588             :     // and set the error status as required.
    2589           0 :     u_terminateUChars(dest, destCapacity, di, pErrorCode);
    2590           0 :     return di;
    2591             : }
    2592             : 
    2593             : static const struct UTextFuncs ucstrFuncs =
    2594             : {
    2595             :     sizeof(UTextFuncs),
    2596             :     0, 0, 0,           // Reserved alignment padding
    2597             :     ucstrTextClone,
    2598             :     ucstrTextLength,
    2599             :     ucstrTextAccess,
    2600             :     ucstrTextExtract,
    2601             :     NULL,              // Replace
    2602             :     NULL,              // Copy
    2603             :     NULL,              // MapOffsetToNative,
    2604             :     NULL,              // MapIndexToUTF16,
    2605             :     ucstrTextClose,
    2606             :     NULL,              // spare 1
    2607             :     NULL,              // spare 2
    2608             :     NULL,              // spare 3
    2609             : };
    2610             : 
    2611             : U_CDECL_END
    2612             : 
    2613             : static const UChar gEmptyUString[] = {0};
    2614             : 
    2615             : U_CAPI UText * U_EXPORT2
    2616           0 : utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status) {
    2617           0 :     if (U_FAILURE(*status)) {
    2618           0 :         return NULL;
    2619             :     }
    2620           0 :     if(s==NULL && length==0) {
    2621           0 :         s = gEmptyUString;
    2622             :     }
    2623           0 :     if (s==NULL || length < -1 || length>INT32_MAX) {
    2624           0 :         *status = U_ILLEGAL_ARGUMENT_ERROR;
    2625           0 :         return NULL;
    2626             :     }
    2627           0 :     ut = utext_setup(ut, 0, status);
    2628           0 :     if (U_SUCCESS(*status)) {
    2629           0 :         ut->pFuncs               = &ucstrFuncs;
    2630           0 :         ut->context              = s;
    2631           0 :         ut->providerProperties   = I32_FLAG(UTEXT_PROVIDER_STABLE_CHUNKS);
    2632           0 :         if (length==-1) {
    2633           0 :             ut->providerProperties |= I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);
    2634             :         }
    2635           0 :         ut->a                    = length;
    2636           0 :         ut->chunkContents        = s;
    2637           0 :         ut->chunkNativeStart     = 0;
    2638           0 :         ut->chunkNativeLimit     = length>=0? length : 0;
    2639           0 :         ut->chunkLength          = (int32_t)ut->chunkNativeLimit;
    2640           0 :         ut->chunkOffset          = 0;
    2641           0 :         ut->nativeIndexingLimit  = ut->chunkLength;
    2642             :     }
    2643           0 :     return ut;
    2644             : }
    2645             : 
    2646             : 
    2647             : //------------------------------------------------------------------------------
    2648             : //
    2649             : //     UText implementation for text from ICU CharacterIterators
    2650             : //
    2651             : //         Use of UText data members:
    2652             : //            context    pointer to the CharacterIterator
    2653             : //            a          length of the full text.
    2654             : //            p          pointer to  buffer 1
    2655             : //            b          start index of local buffer 1 contents
    2656             : //            q          pointer to buffer 2
    2657             : //            c          start index of local buffer 2 contents
    2658             : //            r          pointer to the character iterator if the UText owns it.
    2659             : //                       Null otherwise.
    2660             : //
    2661             : //------------------------------------------------------------------------------
    2662             : #define CIBufSize 16
    2663             : 
    2664             : U_CDECL_BEGIN
    2665             : static void U_CALLCONV
    2666           0 : charIterTextClose(UText *ut) {
    2667             :     // Most of the work of close is done by the generic UText framework close.
    2668             :     // All that needs to be done here is delete the CharacterIterator if the UText
    2669             :     //  owns it.  This occurs if the UText was created by cloning.
    2670           0 :     CharacterIterator *ci = (CharacterIterator *)ut->r;
    2671           0 :     delete ci;
    2672           0 :     ut->r = NULL;
    2673           0 : }
    2674             : 
    2675             : static int64_t U_CALLCONV
    2676           0 : charIterTextLength(UText *ut) {
    2677           0 :     return (int32_t)ut->a;
    2678             : }
    2679             : 
    2680             : static UBool U_CALLCONV
    2681           0 : charIterTextAccess(UText *ut, int64_t index, UBool  forward) {
    2682           0 :     CharacterIterator *ci   = (CharacterIterator *)ut->context;
    2683             : 
    2684           0 :     int32_t clippedIndex = (int32_t)index;
    2685           0 :     if (clippedIndex<0) {
    2686           0 :         clippedIndex=0;
    2687           0 :     } else if (clippedIndex>=ut->a) {
    2688           0 :         clippedIndex=(int32_t)ut->a;
    2689             :     }
    2690           0 :     int32_t neededIndex = clippedIndex;
    2691           0 :     if (!forward && neededIndex>0) {
    2692             :         // reverse iteration, want the position just before what was asked for.
    2693           0 :         neededIndex--;
    2694           0 :     } else if (forward && neededIndex==ut->a && neededIndex>0) {
    2695             :         // Forward iteration, don't ask for something past the end of the text.
    2696           0 :         neededIndex--;
    2697             :     }
    2698             : 
    2699             :     // Find the native index of the start of the buffer containing what we want.
    2700           0 :     neededIndex -= neededIndex % CIBufSize;
    2701             : 
    2702           0 :     UChar *buf = NULL;
    2703           0 :     UBool  needChunkSetup = TRUE;
    2704             :     int    i;
    2705           0 :     if (ut->chunkNativeStart == neededIndex) {
    2706             :         // The buffer we want is already the current chunk.
    2707           0 :         needChunkSetup = FALSE;
    2708           0 :     } else if (ut->b == neededIndex) {
    2709             :         // The first buffer (buffer p) has what we need.
    2710           0 :         buf = (UChar *)ut->p;
    2711           0 :     } else if (ut->c == neededIndex) {
    2712             :         // The second buffer (buffer q) has what we need.
    2713           0 :         buf = (UChar *)ut->q;
    2714             :     } else {
    2715             :         // Neither buffer already has what we need.
    2716             :         // Load new data from the character iterator.
    2717             :         // Use the buf that is not the current buffer.
    2718           0 :         buf = (UChar *)ut->p;
    2719           0 :         if (ut->p == ut->chunkContents) {
    2720           0 :             buf = (UChar *)ut->q;
    2721             :         }
    2722           0 :         ci->setIndex(neededIndex);
    2723           0 :         for (i=0; i<CIBufSize; i++) {
    2724           0 :             buf[i] = ci->nextPostInc();
    2725           0 :             if (i+neededIndex > ut->a) {
    2726           0 :                 break;
    2727             :             }
    2728             :         }
    2729             :     }
    2730             : 
    2731             :     // We have a buffer with the data we need.
    2732             :     // Set it up as the current chunk, if it wasn't already.
    2733           0 :     if (needChunkSetup) {
    2734           0 :         ut->chunkContents = buf;
    2735           0 :         ut->chunkLength   = CIBufSize;
    2736           0 :         ut->chunkNativeStart = neededIndex;
    2737           0 :         ut->chunkNativeLimit = neededIndex + CIBufSize;
    2738           0 :         if (ut->chunkNativeLimit > ut->a) {
    2739           0 :             ut->chunkNativeLimit = ut->a;
    2740           0 :             ut->chunkLength  = (int32_t)(ut->chunkNativeLimit)-(int32_t)(ut->chunkNativeStart);
    2741             :         }
    2742           0 :         ut->nativeIndexingLimit = ut->chunkLength;
    2743           0 :         U_ASSERT(ut->chunkOffset>=0 && ut->chunkOffset<=CIBufSize);
    2744             :     }
    2745           0 :     ut->chunkOffset = clippedIndex - (int32_t)ut->chunkNativeStart;
    2746           0 :     UBool success = (forward? ut->chunkOffset<ut->chunkLength : ut->chunkOffset>0);
    2747           0 :     return success;
    2748             : }
    2749             : 
    2750             : static UText * U_CALLCONV
    2751           0 : charIterTextClone(UText *dest, const UText *src, UBool deep, UErrorCode * status) {
    2752           0 :     if (U_FAILURE(*status)) {
    2753           0 :         return NULL;
    2754             :     }
    2755             : 
    2756           0 :     if (deep) {
    2757             :         // There is no CharacterIterator API for cloning the underlying text storage.
    2758           0 :         *status = U_UNSUPPORTED_ERROR;
    2759           0 :         return NULL;
    2760             :     } else {
    2761           0 :         CharacterIterator *srcCI =(CharacterIterator *)src->context;
    2762           0 :         srcCI = srcCI->clone();
    2763           0 :         dest = utext_openCharacterIterator(dest, srcCI, status);
    2764           0 :         if (U_FAILURE(*status)) {
    2765           0 :             return dest;
    2766             :         }
    2767             :         // cast off const on getNativeIndex.
    2768             :         //   For CharacterIterator based UTexts, this is safe, the operation is const.
    2769           0 :         int64_t  ix = utext_getNativeIndex((UText *)src);
    2770           0 :         utext_setNativeIndex(dest, ix);
    2771           0 :         dest->r = srcCI;    // flags that this UText owns the CharacterIterator
    2772             :     }
    2773           0 :     return dest;
    2774             : }
    2775             : 
    2776             : static int32_t U_CALLCONV
    2777           0 : charIterTextExtract(UText *ut,
    2778             :                   int64_t start, int64_t limit,
    2779             :                   UChar *dest, int32_t destCapacity,
    2780             :                   UErrorCode *status)
    2781             : {
    2782           0 :     if(U_FAILURE(*status)) {
    2783           0 :         return 0;
    2784             :     }
    2785           0 :     if(destCapacity<0 || (dest==NULL && destCapacity>0) || start>limit) {
    2786           0 :         *status=U_ILLEGAL_ARGUMENT_ERROR;
    2787           0 :         return 0;
    2788             :     }
    2789           0 :     int32_t  length  = (int32_t)ut->a;
    2790           0 :     int32_t  start32 = pinIndex(start, length);
    2791           0 :     int32_t  limit32 = pinIndex(limit, length);
    2792           0 :     int32_t  desti   = 0;
    2793             :     int32_t  srci;
    2794             :     int32_t  copyLimit;
    2795             : 
    2796           0 :     CharacterIterator *ci = (CharacterIterator *)ut->context;
    2797           0 :     ci->setIndex32(start32);   // Moves ix to lead of surrogate pair, if needed.
    2798           0 :     srci = ci->getIndex();
    2799           0 :     copyLimit = srci;
    2800           0 :     while (srci<limit32) {
    2801           0 :         UChar32 c = ci->next32PostInc();
    2802           0 :         int32_t  len = U16_LENGTH(c);
    2803           0 :         U_ASSERT(desti+len>0); /* to ensure desti+len never exceeds MAX_INT32, which must not happen logically */
    2804           0 :         if (desti+len <= destCapacity) {
    2805           0 :             U16_APPEND_UNSAFE(dest, desti, c);
    2806           0 :             copyLimit = srci+len;
    2807             :         } else {
    2808           0 :             desti += len;
    2809           0 :             *status = U_BUFFER_OVERFLOW_ERROR;
    2810             :         }
    2811           0 :         srci += len;
    2812             :     }
    2813             : 
    2814           0 :     charIterTextAccess(ut, copyLimit, TRUE);
    2815             : 
    2816           0 :     u_terminateUChars(dest, destCapacity, desti, status);
    2817           0 :     return desti;
    2818             : }
    2819             : 
    2820             : static const struct UTextFuncs charIterFuncs =
    2821             : {
    2822             :     sizeof(UTextFuncs),
    2823             :     0, 0, 0,             // Reserved alignment padding
    2824             :     charIterTextClone,
    2825             :     charIterTextLength,
    2826             :     charIterTextAccess,
    2827             :     charIterTextExtract,
    2828             :     NULL,                // Replace
    2829             :     NULL,                // Copy
    2830             :     NULL,                // MapOffsetToNative,
    2831             :     NULL,                // MapIndexToUTF16,
    2832             :     charIterTextClose,
    2833             :     NULL,                // spare 1
    2834             :     NULL,                // spare 2
    2835             :     NULL                 // spare 3
    2836             : };
    2837             : U_CDECL_END
    2838             : 
    2839             : 
    2840             : U_CAPI UText * U_EXPORT2
    2841           0 : utext_openCharacterIterator(UText *ut, CharacterIterator *ci, UErrorCode *status) {
    2842           0 :     if (U_FAILURE(*status)) {
    2843           0 :         return NULL;
    2844             :     }
    2845             : 
    2846           0 :     if (ci->startIndex() > 0) {
    2847             :         // No support for CharacterIterators that do not start indexing from zero.
    2848           0 :         *status = U_UNSUPPORTED_ERROR;
    2849           0 :         return NULL;
    2850             :     }
    2851             : 
    2852             :     // Extra space in UText for 2 buffers of CIBufSize UChars each.
    2853           0 :     int32_t  extraSpace = 2 * CIBufSize * sizeof(UChar);
    2854           0 :     ut = utext_setup(ut, extraSpace, status);
    2855           0 :     if (U_SUCCESS(*status)) {
    2856           0 :         ut->pFuncs                = &charIterFuncs;
    2857           0 :         ut->context              = ci;
    2858           0 :         ut->providerProperties   = 0;
    2859           0 :         ut->a                    = ci->endIndex();        // Length of text
    2860           0 :         ut->p                    = ut->pExtra;            // First buffer
    2861           0 :         ut->b                    = -1;                    // Native index of first buffer contents
    2862           0 :         ut->q                    = (UChar*)ut->pExtra+CIBufSize;  // Second buffer
    2863           0 :         ut->c                    = -1;                    // Native index of second buffer contents
    2864             : 
    2865             :         // Initialize current chunk contents to be empty.
    2866             :         //   First access will fault something in.
    2867             :         //   Note:  The initial nativeStart and chunkOffset must sum to zero
    2868             :         //          so that getNativeIndex() will correctly compute to zero
    2869             :         //          if no call to Access() has ever been made.  They can't be both
    2870             :         //          zero without Access() thinking that the chunk is valid.
    2871           0 :         ut->chunkContents        = (UChar *)ut->p;
    2872           0 :         ut->chunkNativeStart     = -1;
    2873           0 :         ut->chunkOffset          = 1;
    2874           0 :         ut->chunkNativeLimit     = 0;
    2875           0 :         ut->chunkLength          = 0;
    2876           0 :         ut->nativeIndexingLimit  = ut->chunkOffset;  // enables native indexing
    2877             :     }
    2878           0 :     return ut;
    2879             : }

Generated by: LCOV version 1.13