LCOV - code coverage report
Current view: top level - intl/icu/source/i18n - ucoleitr.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 0 211 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 32 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             : ******************************************************************************
       5             : *   Copyright (C) 2001-2016, International Business Machines
       6             : *   Corporation and others.  All Rights Reserved.
       7             : ******************************************************************************
       8             : *
       9             : * File ucoleitr.cpp
      10             : *
      11             : * Modification History:
      12             : *
      13             : * Date        Name        Description
      14             : * 02/15/2001  synwee      Modified all methods to process its own function 
      15             : *                         instead of calling the equivalent c++ api (coleitr.h)
      16             : * 2012-2014   markus      Rewritten in C++ again.
      17             : ******************************************************************************/
      18             : 
      19             : #include "unicode/utypes.h"
      20             : 
      21             : #if !UCONFIG_NO_COLLATION
      22             : 
      23             : #include "unicode/coleitr.h"
      24             : #include "unicode/tblcoll.h"
      25             : #include "unicode/ucoleitr.h"
      26             : #include "unicode/ustring.h"
      27             : #include "unicode/sortkey.h"
      28             : #include "unicode/uobject.h"
      29             : #include "cmemory.h"
      30             : #include "usrchimp.h"
      31             : 
      32             : U_NAMESPACE_USE
      33             : 
      34             : #define BUFFER_LENGTH             100
      35             : 
      36             : #define DEFAULT_BUFFER_SIZE 16
      37             : #define BUFFER_GROW 8
      38             : 
      39             : #define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src), (size_t)(count) * sizeof (src)[0])
      40             : 
      41             : #define NEW_ARRAY(type, count) (type *) uprv_malloc((size_t)(count) * sizeof(type))
      42             : 
      43             : #define DELETE_ARRAY(array) uprv_free((void *) (array))
      44             : 
      45             : struct RCEI
      46             : {
      47             :     uint32_t ce;
      48             :     int32_t  low;
      49             :     int32_t  high;
      50             : };
      51             : 
      52             : U_NAMESPACE_BEGIN
      53             : 
      54             : struct RCEBuffer
      55             : {
      56             :     RCEI    defaultBuffer[DEFAULT_BUFFER_SIZE];
      57             :     RCEI   *buffer;
      58             :     int32_t bufferIndex;
      59             :     int32_t bufferSize;
      60             : 
      61             :     RCEBuffer();
      62             :     ~RCEBuffer();
      63             : 
      64             :     UBool isEmpty() const;
      65             :     void  put(uint32_t ce, int32_t ixLow, int32_t ixHigh, UErrorCode &errorCode);
      66             :     const RCEI *get();
      67             : };
      68             : 
      69           0 : RCEBuffer::RCEBuffer()
      70             : {
      71           0 :     buffer = defaultBuffer;
      72           0 :     bufferIndex = 0;
      73           0 :     bufferSize = UPRV_LENGTHOF(defaultBuffer);
      74           0 : }
      75             : 
      76           0 : RCEBuffer::~RCEBuffer()
      77             : {
      78           0 :     if (buffer != defaultBuffer) {
      79           0 :         DELETE_ARRAY(buffer);
      80             :     }
      81           0 : }
      82             : 
      83           0 : UBool RCEBuffer::isEmpty() const
      84             : {
      85           0 :     return bufferIndex <= 0;
      86             : }
      87             : 
      88           0 : void RCEBuffer::put(uint32_t ce, int32_t ixLow, int32_t ixHigh, UErrorCode &errorCode)
      89             : {
      90           0 :     if (U_FAILURE(errorCode)) {
      91           0 :         return;
      92             :     }
      93           0 :     if (bufferIndex >= bufferSize) {
      94           0 :         RCEI *newBuffer = NEW_ARRAY(RCEI, bufferSize + BUFFER_GROW);
      95           0 :         if (newBuffer == NULL) {
      96           0 :             errorCode = U_MEMORY_ALLOCATION_ERROR;
      97           0 :             return;
      98             :         }
      99             : 
     100           0 :         ARRAY_COPY(newBuffer, buffer, bufferSize);
     101             : 
     102           0 :         if (buffer != defaultBuffer) {
     103           0 :             DELETE_ARRAY(buffer);
     104             :         }
     105             : 
     106           0 :         buffer = newBuffer;
     107           0 :         bufferSize += BUFFER_GROW;
     108             :     }
     109             : 
     110           0 :     buffer[bufferIndex].ce   = ce;
     111           0 :     buffer[bufferIndex].low  = ixLow;
     112           0 :     buffer[bufferIndex].high = ixHigh;
     113             : 
     114           0 :     bufferIndex += 1;
     115             : }
     116             : 
     117           0 : const RCEI *RCEBuffer::get()
     118             : {
     119           0 :     if (bufferIndex > 0) {
     120           0 :      return &buffer[--bufferIndex];
     121             :     }
     122             : 
     123           0 :     return NULL;
     124             : }
     125             : 
     126           0 : PCEBuffer::PCEBuffer()
     127             : {
     128           0 :     buffer = defaultBuffer;
     129           0 :     bufferIndex = 0;
     130           0 :     bufferSize = UPRV_LENGTHOF(defaultBuffer);
     131           0 : }
     132             : 
     133           0 : PCEBuffer::~PCEBuffer()
     134             : {
     135           0 :     if (buffer != defaultBuffer) {
     136           0 :         DELETE_ARRAY(buffer);
     137             :     }
     138           0 : }
     139             : 
     140           0 : void PCEBuffer::reset()
     141             : {
     142           0 :     bufferIndex = 0;
     143           0 : }
     144             : 
     145           0 : UBool PCEBuffer::isEmpty() const
     146             : {
     147           0 :     return bufferIndex <= 0;
     148             : }
     149             : 
     150           0 : void PCEBuffer::put(uint64_t ce, int32_t ixLow, int32_t ixHigh, UErrorCode &errorCode)
     151             : {
     152           0 :     if (U_FAILURE(errorCode)) {
     153           0 :         return;
     154             :     }
     155           0 :     if (bufferIndex >= bufferSize) {
     156           0 :         PCEI *newBuffer = NEW_ARRAY(PCEI, bufferSize + BUFFER_GROW);
     157           0 :         if (newBuffer == NULL) {
     158           0 :             errorCode = U_MEMORY_ALLOCATION_ERROR;
     159           0 :             return;
     160             :         }
     161             : 
     162           0 :         ARRAY_COPY(newBuffer, buffer, bufferSize);
     163             : 
     164           0 :         if (buffer != defaultBuffer) {
     165           0 :             DELETE_ARRAY(buffer);
     166             :         }
     167             : 
     168           0 :         buffer = newBuffer;
     169           0 :         bufferSize += BUFFER_GROW;
     170             :     }
     171             : 
     172           0 :     buffer[bufferIndex].ce   = ce;
     173           0 :     buffer[bufferIndex].low  = ixLow;
     174           0 :     buffer[bufferIndex].high = ixHigh;
     175             : 
     176           0 :     bufferIndex += 1;
     177             : }
     178             : 
     179           0 : const PCEI *PCEBuffer::get()
     180             : {
     181           0 :     if (bufferIndex > 0) {
     182           0 :      return &buffer[--bufferIndex];
     183             :     }
     184             : 
     185           0 :     return NULL;
     186             : }
     187             : 
     188           0 : UCollationPCE::UCollationPCE(UCollationElements *elems) { init(elems); }
     189             : 
     190           0 : UCollationPCE::UCollationPCE(CollationElementIterator *iter) { init(iter); }
     191             : 
     192           0 : void UCollationPCE::init(UCollationElements *elems) {
     193           0 :     init(CollationElementIterator::fromUCollationElements(elems));
     194           0 : }
     195             : 
     196           0 : void UCollationPCE::init(CollationElementIterator *iter)
     197             : {
     198           0 :     cei = iter;
     199           0 :     init(*iter->rbc_);
     200           0 : }
     201             : 
     202           0 : void UCollationPCE::init(const Collator &coll)
     203             : {
     204           0 :     UErrorCode status = U_ZERO_ERROR;
     205             : 
     206           0 :     strength    = coll.getAttribute(UCOL_STRENGTH, status);
     207           0 :     toShift     = coll.getAttribute(UCOL_ALTERNATE_HANDLING, status) == UCOL_SHIFTED;
     208           0 :     isShifted   = FALSE;
     209           0 :     variableTop = coll.getVariableTop(status);
     210           0 : }
     211             : 
     212           0 : UCollationPCE::~UCollationPCE()
     213             : {
     214             :     // nothing to do
     215           0 : }
     216             : 
     217           0 : uint64_t UCollationPCE::processCE(uint32_t ce)
     218             : {
     219           0 :     uint64_t primary = 0, secondary = 0, tertiary = 0, quaternary = 0;
     220             : 
     221             :     // This is clean, but somewhat slow...
     222             :     // We could apply the mask to ce and then
     223             :     // just get all three orders...
     224           0 :     switch(strength) {
     225             :     default:
     226           0 :         tertiary = ucol_tertiaryOrder(ce);
     227             :         U_FALLTHROUGH;
     228             : 
     229             :     case UCOL_SECONDARY:
     230           0 :         secondary = ucol_secondaryOrder(ce);
     231             :         U_FALLTHROUGH;
     232             : 
     233             :     case UCOL_PRIMARY:
     234           0 :         primary = ucol_primaryOrder(ce);
     235             :     }
     236             : 
     237             :     // **** This should probably handle continuations too.  ****
     238             :     // **** That means that we need 24 bits for the primary ****
     239             :     // **** instead of the 16 that we're currently using.   ****
     240             :     // **** So we can lay out the 64 bits as: 24.12.12.16.  ****
     241             :     // **** Another complication with continuations is that ****
     242             :     // **** the *second* CE is marked as a continuation, so ****
     243             :     // **** we always have to peek ahead to know how long   ****
     244             :     // **** the primary is...                               ****
     245           0 :     if ((toShift && variableTop > ce && primary != 0)
     246           0 :                 || (isShifted && primary == 0)) {
     247             : 
     248           0 :         if (primary == 0) {
     249           0 :             return UCOL_IGNORABLE;
     250             :         }
     251             : 
     252           0 :         if (strength >= UCOL_QUATERNARY) {
     253           0 :             quaternary = primary;
     254             :         }
     255             : 
     256           0 :         primary = secondary = tertiary = 0;
     257           0 :         isShifted = TRUE;
     258             :     } else {
     259           0 :         if (strength >= UCOL_QUATERNARY) {
     260           0 :             quaternary = 0xFFFF;
     261             :         }
     262             : 
     263           0 :         isShifted = FALSE;
     264             :     }
     265             : 
     266           0 :     return primary << 48 | secondary << 32 | tertiary << 16 | quaternary;
     267             : }
     268             : 
     269             : U_NAMESPACE_END
     270             : 
     271             : /* public methods ---------------------------------------------------- */
     272             : 
     273             : U_CAPI UCollationElements* U_EXPORT2
     274           0 : ucol_openElements(const UCollator  *coll,
     275             :                   const UChar      *text,
     276             :                         int32_t    textLength,
     277             :                         UErrorCode *status)
     278             : {
     279           0 :     if (U_FAILURE(*status)) {
     280           0 :         return NULL;
     281             :     }
     282           0 :     if (coll == NULL || (text == NULL && textLength != 0)) {
     283           0 :         *status = U_ILLEGAL_ARGUMENT_ERROR;
     284           0 :         return NULL;
     285             :     }
     286           0 :     const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll);
     287           0 :     if (rbc == NULL) {
     288           0 :         *status = U_UNSUPPORTED_ERROR;  // coll is a Collator but not a RuleBasedCollator
     289           0 :         return NULL;
     290             :     }
     291             : 
     292           0 :     UnicodeString s((UBool)(textLength < 0), text, textLength);
     293           0 :     CollationElementIterator *cei = rbc->createCollationElementIterator(s);
     294           0 :     if (cei == NULL) {
     295           0 :         *status = U_MEMORY_ALLOCATION_ERROR;
     296           0 :         return NULL;
     297             :     }
     298             : 
     299           0 :     return cei->toUCollationElements();
     300             : }
     301             : 
     302             : 
     303             : U_CAPI void U_EXPORT2
     304           0 : ucol_closeElements(UCollationElements *elems)
     305             : {
     306           0 :     delete CollationElementIterator::fromUCollationElements(elems);
     307           0 : }
     308             : 
     309             : U_CAPI void U_EXPORT2
     310           0 : ucol_reset(UCollationElements *elems)
     311             : {
     312           0 :     CollationElementIterator::fromUCollationElements(elems)->reset();
     313           0 : }
     314             : 
     315             : U_CAPI int32_t U_EXPORT2
     316           0 : ucol_next(UCollationElements *elems, 
     317             :           UErrorCode         *status)
     318             : {
     319           0 :     if (U_FAILURE(*status)) {
     320           0 :         return UCOL_NULLORDER;
     321             :     }
     322             : 
     323           0 :     return CollationElementIterator::fromUCollationElements(elems)->next(*status);
     324             : }
     325             : 
     326             : U_NAMESPACE_BEGIN
     327             : 
     328             : int64_t
     329           0 : UCollationPCE::nextProcessed(
     330             :                    int32_t            *ixLow,
     331             :                    int32_t            *ixHigh,
     332             :                    UErrorCode         *status)
     333             : {
     334           0 :     int64_t result = UCOL_IGNORABLE;
     335           0 :     uint32_t low = 0, high = 0;
     336             : 
     337           0 :     if (U_FAILURE(*status)) {
     338           0 :         return UCOL_PROCESSED_NULLORDER;
     339             :     }
     340             : 
     341           0 :     pceBuffer.reset();
     342             : 
     343           0 :     do {
     344           0 :         low = cei->getOffset();
     345           0 :         int32_t ce = cei->next(*status);
     346           0 :         high = cei->getOffset();
     347             : 
     348           0 :         if (ce == UCOL_NULLORDER) {
     349           0 :              result = UCOL_PROCESSED_NULLORDER;
     350           0 :              break;
     351             :         }
     352             : 
     353           0 :         result = processCE((uint32_t)ce);
     354           0 :     } while (result == UCOL_IGNORABLE);
     355             : 
     356           0 :     if (ixLow != NULL) {
     357           0 :         *ixLow = low;
     358             :     }
     359             : 
     360           0 :     if (ixHigh != NULL) {
     361           0 :         *ixHigh = high;
     362             :     }
     363             : 
     364           0 :     return result;
     365             : }
     366             : 
     367             : U_NAMESPACE_END
     368             : 
     369             : U_CAPI int32_t U_EXPORT2
     370           0 : ucol_previous(UCollationElements *elems,
     371             :               UErrorCode         *status)
     372             : {
     373           0 :     if(U_FAILURE(*status)) {
     374           0 :         return UCOL_NULLORDER;
     375             :     }
     376           0 :     return CollationElementIterator::fromUCollationElements(elems)->previous(*status);
     377             : }
     378             : 
     379             : U_NAMESPACE_BEGIN
     380             : 
     381             : int64_t
     382           0 : UCollationPCE::previousProcessed(
     383             :                    int32_t            *ixLow,
     384             :                    int32_t            *ixHigh,
     385             :                    UErrorCode         *status)
     386             : {
     387           0 :     int64_t result = UCOL_IGNORABLE;
     388           0 :     int32_t  low = 0, high = 0;
     389             : 
     390           0 :     if (U_FAILURE(*status)) {
     391           0 :         return UCOL_PROCESSED_NULLORDER;
     392             :     }
     393             : 
     394             :     // pceBuffer.reset();
     395             : 
     396           0 :     while (pceBuffer.isEmpty()) {
     397             :         // buffer raw CEs up to non-ignorable primary
     398           0 :         RCEBuffer rceb;
     399             :         int32_t ce;
     400             :         
     401             :         // **** do we need to reset rceb, or will it always be empty at this point ****
     402           0 :         do {
     403           0 :             high = cei->getOffset();
     404           0 :             ce   = cei->previous(*status);
     405           0 :             low  = cei->getOffset();
     406             : 
     407           0 :             if (ce == UCOL_NULLORDER) {
     408           0 :                 if (!rceb.isEmpty()) {
     409           0 :                     break;
     410             :                 }
     411             : 
     412           0 :                 goto finish;
     413             :             }
     414             : 
     415           0 :             rceb.put((uint32_t)ce, low, high, *status);
     416           0 :         } while (U_SUCCESS(*status) && ((ce & UCOL_PRIMARYORDERMASK) == 0 || isContinuation(ce)));
     417             : 
     418             :         // process the raw CEs
     419           0 :         while (U_SUCCESS(*status) && !rceb.isEmpty()) {
     420           0 :             const RCEI *rcei = rceb.get();
     421             : 
     422           0 :             result = processCE(rcei->ce);
     423             : 
     424           0 :             if (result != UCOL_IGNORABLE) {
     425           0 :                 pceBuffer.put(result, rcei->low, rcei->high, *status);
     426             :             }
     427             :         }
     428           0 :         if (U_FAILURE(*status)) {
     429           0 :             return UCOL_PROCESSED_NULLORDER;
     430             :         }
     431             :     }
     432             : 
     433             : finish:
     434           0 :     if (pceBuffer.isEmpty()) {
     435             :         // **** Is -1 the right value for ixLow, ixHigh? ****
     436           0 :         if (ixLow != NULL) {
     437           0 :                 *ixLow = -1;
     438             :         }
     439             :         
     440           0 :         if (ixHigh != NULL) {
     441           0 :                 *ixHigh = -1
     442             :                 ;
     443             :         }
     444           0 :         return UCOL_PROCESSED_NULLORDER;
     445             :     }
     446             : 
     447           0 :     const PCEI *pcei = pceBuffer.get();
     448             : 
     449           0 :     if (ixLow != NULL) {
     450           0 :         *ixLow = pcei->low;
     451             :     }
     452             : 
     453           0 :     if (ixHigh != NULL) {
     454           0 :         *ixHigh = pcei->high;
     455             :     }
     456             : 
     457           0 :     return pcei->ce;
     458             : }
     459             : 
     460             : U_NAMESPACE_END
     461             : 
     462             : U_CAPI int32_t U_EXPORT2
     463           0 : ucol_getMaxExpansion(const UCollationElements *elems,
     464             :                            int32_t            order)
     465             : {
     466           0 :     return CollationElementIterator::fromUCollationElements(elems)->getMaxExpansion(order);
     467             : 
     468             :     // TODO: The old code masked the order according to strength and then did a binary search.
     469             :     // However this was probably at least partially broken because of the following comment.
     470             :     // Still, it might have found a match when this version may not.
     471             : 
     472             :     // FIXME: with a masked search, there might be more than one hit,
     473             :     // so we need to look forward and backward from the match to find all
     474             :     // of the hits...
     475             : }
     476             : 
     477             : U_CAPI void U_EXPORT2
     478           0 : ucol_setText(      UCollationElements *elems,
     479             :              const UChar              *text,
     480             :                    int32_t            textLength,
     481             :                    UErrorCode         *status)
     482             : {
     483           0 :     if (U_FAILURE(*status)) {
     484           0 :         return;
     485             :     }
     486             : 
     487           0 :     if ((text == NULL && textLength != 0)) {
     488           0 :         *status = U_ILLEGAL_ARGUMENT_ERROR;
     489           0 :         return;
     490             :     }
     491           0 :     UnicodeString s((UBool)(textLength < 0), text, textLength);
     492           0 :     return CollationElementIterator::fromUCollationElements(elems)->setText(s, *status);
     493             : }
     494             : 
     495             : U_CAPI int32_t U_EXPORT2
     496           0 : ucol_getOffset(const UCollationElements *elems)
     497             : {
     498           0 :     return CollationElementIterator::fromUCollationElements(elems)->getOffset();
     499             : }
     500             : 
     501             : U_CAPI void U_EXPORT2
     502           0 : ucol_setOffset(UCollationElements    *elems,
     503             :                int32_t           offset,
     504             :                UErrorCode            *status)
     505             : {
     506           0 :     if (U_FAILURE(*status)) {
     507           0 :         return;
     508             :     }
     509             : 
     510           0 :     CollationElementIterator::fromUCollationElements(elems)->setOffset(offset, *status);
     511             : }
     512             : 
     513             : U_CAPI int32_t U_EXPORT2
     514           0 : ucol_primaryOrder (int32_t order) 
     515             : {
     516           0 :     return (order >> 16) & 0xffff;
     517             : }
     518             : 
     519             : U_CAPI int32_t U_EXPORT2
     520           0 : ucol_secondaryOrder (int32_t order) 
     521             : {
     522           0 :     return (order >> 8) & 0xff;
     523             : }
     524             : 
     525             : U_CAPI int32_t U_EXPORT2
     526           0 : ucol_tertiaryOrder (int32_t order) 
     527             : {
     528           0 :     return order & 0xff;
     529             : }
     530             : 
     531             : #endif /* #if !UCONFIG_NO_COLLATION */

Generated by: LCOV version 1.13