LCOV - code coverage report
Current view: top level - intl/icu/source/common - filterednormalizer2.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 0 136 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 18 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             : *******************************************************************************
       5             : *
       6             : *   Copyright (C) 2009-2012, International Business Machines
       7             : *   Corporation and others.  All Rights Reserved.
       8             : *
       9             : *******************************************************************************
      10             : *   file name:  filterednormalizer2.cpp
      11             : *   encoding:   UTF-8
      12             : *   tab size:   8 (not used)
      13             : *   indentation:4
      14             : *
      15             : *   created on: 2009dec10
      16             : *   created by: Markus W. Scherer
      17             : */
      18             : 
      19             : #include "unicode/utypes.h"
      20             : 
      21             : #if !UCONFIG_NO_NORMALIZATION
      22             : 
      23             : #include "unicode/normalizer2.h"
      24             : #include "unicode/uniset.h"
      25             : #include "unicode/unistr.h"
      26             : #include "unicode/unorm.h"
      27             : #include "cpputils.h"
      28             : 
      29             : U_NAMESPACE_BEGIN
      30             : 
      31           0 : FilteredNormalizer2::~FilteredNormalizer2() {}
      32             : 
      33             : UnicodeString &
      34           0 : FilteredNormalizer2::normalize(const UnicodeString &src,
      35             :                                UnicodeString &dest,
      36             :                                UErrorCode &errorCode) const {
      37           0 :     uprv_checkCanGetBuffer(src, errorCode);
      38           0 :     if(U_FAILURE(errorCode)) {
      39           0 :         dest.setToBogus();
      40           0 :         return dest;
      41             :     }
      42           0 :     if(&dest==&src) {
      43           0 :         errorCode=U_ILLEGAL_ARGUMENT_ERROR;
      44           0 :         return dest;
      45             :     }
      46           0 :     dest.remove();
      47           0 :     return normalize(src, dest, USET_SPAN_SIMPLE, errorCode);
      48             : }
      49             : 
      50             : // Internal: No argument checking, and appends to dest.
      51             : // Pass as input spanCondition the one that is likely to yield a non-zero
      52             : // span length at the start of src.
      53             : // For set=[:age=3.2:], since almost all common characters were in Unicode 3.2,
      54             : // USET_SPAN_SIMPLE should be passed in for the start of src
      55             : // and USET_SPAN_NOT_CONTAINED should be passed in if we continue after
      56             : // an in-filter prefix.
      57             : UnicodeString &
      58           0 : FilteredNormalizer2::normalize(const UnicodeString &src,
      59             :                                UnicodeString &dest,
      60             :                                USetSpanCondition spanCondition,
      61             :                                UErrorCode &errorCode) const {
      62           0 :     UnicodeString tempDest;  // Don't throw away destination buffer between iterations.
      63           0 :     for(int32_t prevSpanLimit=0; prevSpanLimit<src.length();) {
      64           0 :         int32_t spanLimit=set.span(src, prevSpanLimit, spanCondition);
      65           0 :         int32_t spanLength=spanLimit-prevSpanLimit;
      66           0 :         if(spanCondition==USET_SPAN_NOT_CONTAINED) {
      67           0 :             if(spanLength!=0) {
      68           0 :                 dest.append(src, prevSpanLimit, spanLength);
      69             :             }
      70           0 :             spanCondition=USET_SPAN_SIMPLE;
      71             :         } else {
      72           0 :             if(spanLength!=0) {
      73             :                 // Not norm2.normalizeSecondAndAppend() because we do not want
      74             :                 // to modify the non-filter part of dest.
      75           0 :                 dest.append(norm2.normalize(src.tempSubStringBetween(prevSpanLimit, spanLimit),
      76           0 :                                             tempDest, errorCode));
      77           0 :                 if(U_FAILURE(errorCode)) {
      78           0 :                     break;
      79             :                 }
      80             :             }
      81           0 :             spanCondition=USET_SPAN_NOT_CONTAINED;
      82             :         }
      83           0 :         prevSpanLimit=spanLimit;
      84             :     }
      85           0 :     return dest;
      86             : }
      87             : 
      88             : UnicodeString &
      89           0 : FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first,
      90             :                                               const UnicodeString &second,
      91             :                                               UErrorCode &errorCode) const {
      92           0 :     return normalizeSecondAndAppend(first, second, TRUE, errorCode);
      93             : }
      94             : 
      95             : UnicodeString &
      96           0 : FilteredNormalizer2::append(UnicodeString &first,
      97             :                             const UnicodeString &second,
      98             :                             UErrorCode &errorCode) const {
      99           0 :     return normalizeSecondAndAppend(first, second, FALSE, errorCode);
     100             : }
     101             : 
     102             : UnicodeString &
     103           0 : FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first,
     104             :                                               const UnicodeString &second,
     105             :                                               UBool doNormalize,
     106             :                                               UErrorCode &errorCode) const {
     107           0 :     uprv_checkCanGetBuffer(first, errorCode);
     108           0 :     uprv_checkCanGetBuffer(second, errorCode);
     109           0 :     if(U_FAILURE(errorCode)) {
     110           0 :         return first;
     111             :     }
     112           0 :     if(&first==&second) {
     113           0 :         errorCode=U_ILLEGAL_ARGUMENT_ERROR;
     114           0 :         return first;
     115             :     }
     116           0 :     if(first.isEmpty()) {
     117           0 :         if(doNormalize) {
     118           0 :             return normalize(second, first, errorCode);
     119             :         } else {
     120           0 :             return first=second;
     121             :         }
     122             :     }
     123             :     // merge the in-filter suffix of the first string with the in-filter prefix of the second
     124           0 :     int32_t prefixLimit=set.span(second, 0, USET_SPAN_SIMPLE);
     125           0 :     if(prefixLimit!=0) {
     126           0 :         UnicodeString prefix(second.tempSubString(0, prefixLimit));
     127           0 :         int32_t suffixStart=set.spanBack(first, INT32_MAX, USET_SPAN_SIMPLE);
     128           0 :         if(suffixStart==0) {
     129           0 :             if(doNormalize) {
     130           0 :                 norm2.normalizeSecondAndAppend(first, prefix, errorCode);
     131             :             } else {
     132           0 :                 norm2.append(first, prefix, errorCode);
     133             :             }
     134             :         } else {
     135           0 :             UnicodeString middle(first, suffixStart, INT32_MAX);
     136           0 :             if(doNormalize) {
     137           0 :                 norm2.normalizeSecondAndAppend(middle, prefix, errorCode);
     138             :             } else {
     139           0 :                 norm2.append(middle, prefix, errorCode);
     140             :             }
     141           0 :             first.replace(suffixStart, INT32_MAX, middle);
     142             :         }
     143             :     }
     144           0 :     if(prefixLimit<second.length()) {
     145           0 :         UnicodeString rest(second.tempSubString(prefixLimit, INT32_MAX));
     146           0 :         if(doNormalize) {
     147           0 :             normalize(rest, first, USET_SPAN_NOT_CONTAINED, errorCode);
     148             :         } else {
     149           0 :             first.append(rest);
     150             :         }
     151             :     }
     152           0 :     return first;
     153             : }
     154             : 
     155             : UBool
     156           0 : FilteredNormalizer2::getDecomposition(UChar32 c, UnicodeString &decomposition) const {
     157           0 :     return set.contains(c) && norm2.getDecomposition(c, decomposition);
     158             : }
     159             : 
     160             : UBool
     161           0 : FilteredNormalizer2::getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
     162           0 :     return set.contains(c) && norm2.getRawDecomposition(c, decomposition);
     163             : }
     164             : 
     165             : UChar32
     166           0 : FilteredNormalizer2::composePair(UChar32 a, UChar32 b) const {
     167           0 :     return (set.contains(a) && set.contains(b)) ? norm2.composePair(a, b) : U_SENTINEL;
     168             : }
     169             : 
     170             : uint8_t
     171           0 : FilteredNormalizer2::getCombiningClass(UChar32 c) const {
     172           0 :     return set.contains(c) ? norm2.getCombiningClass(c) : 0;
     173             : }
     174             : 
     175             : UBool
     176           0 : FilteredNormalizer2::isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
     177           0 :     uprv_checkCanGetBuffer(s, errorCode);
     178           0 :     if(U_FAILURE(errorCode)) {
     179           0 :         return FALSE;
     180             :     }
     181           0 :     USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
     182           0 :     for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
     183           0 :         int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
     184           0 :         if(spanCondition==USET_SPAN_NOT_CONTAINED) {
     185           0 :             spanCondition=USET_SPAN_SIMPLE;
     186             :         } else {
     187           0 :             if( !norm2.isNormalized(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode) ||
     188           0 :                 U_FAILURE(errorCode)
     189             :             ) {
     190           0 :                 return FALSE;
     191             :             }
     192           0 :             spanCondition=USET_SPAN_NOT_CONTAINED;
     193             :         }
     194           0 :         prevSpanLimit=spanLimit;
     195             :     }
     196           0 :     return TRUE;
     197             : }
     198             : 
     199             : UNormalizationCheckResult
     200           0 : FilteredNormalizer2::quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
     201           0 :     uprv_checkCanGetBuffer(s, errorCode);
     202           0 :     if(U_FAILURE(errorCode)) {
     203           0 :         return UNORM_MAYBE;
     204             :     }
     205           0 :     UNormalizationCheckResult result=UNORM_YES;
     206           0 :     USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
     207           0 :     for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
     208           0 :         int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
     209           0 :         if(spanCondition==USET_SPAN_NOT_CONTAINED) {
     210           0 :             spanCondition=USET_SPAN_SIMPLE;
     211             :         } else {
     212             :             UNormalizationCheckResult qcResult=
     213           0 :                 norm2.quickCheck(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode);
     214           0 :             if(U_FAILURE(errorCode) || qcResult==UNORM_NO) {
     215           0 :                 return qcResult;
     216           0 :             } else if(qcResult==UNORM_MAYBE) {
     217           0 :                 result=qcResult;
     218             :             }
     219           0 :             spanCondition=USET_SPAN_NOT_CONTAINED;
     220             :         }
     221           0 :         prevSpanLimit=spanLimit;
     222             :     }
     223           0 :     return result;
     224             : }
     225             : 
     226             : int32_t
     227           0 : FilteredNormalizer2::spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
     228           0 :     uprv_checkCanGetBuffer(s, errorCode);
     229           0 :     if(U_FAILURE(errorCode)) {
     230           0 :         return 0;
     231             :     }
     232           0 :     USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
     233           0 :     for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
     234           0 :         int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
     235           0 :         if(spanCondition==USET_SPAN_NOT_CONTAINED) {
     236           0 :             spanCondition=USET_SPAN_SIMPLE;
     237             :         } else {
     238             :             int32_t yesLimit=
     239             :                 prevSpanLimit+
     240           0 :                 norm2.spanQuickCheckYes(
     241           0 :                     s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode);
     242           0 :             if(U_FAILURE(errorCode) || yesLimit<spanLimit) {
     243           0 :                 return yesLimit;
     244             :             }
     245           0 :             spanCondition=USET_SPAN_NOT_CONTAINED;
     246             :         }
     247           0 :         prevSpanLimit=spanLimit;
     248             :     }
     249           0 :     return s.length();
     250             : }
     251             : 
     252             : UBool
     253           0 : FilteredNormalizer2::hasBoundaryBefore(UChar32 c) const {
     254           0 :     return !set.contains(c) || norm2.hasBoundaryBefore(c);
     255             : }
     256             : 
     257             : UBool
     258           0 : FilteredNormalizer2::hasBoundaryAfter(UChar32 c) const {
     259           0 :     return !set.contains(c) || norm2.hasBoundaryAfter(c);
     260             : }
     261             : 
     262             : UBool
     263           0 : FilteredNormalizer2::isInert(UChar32 c) const {
     264           0 :     return !set.contains(c) || norm2.isInert(c);
     265             : }
     266             : 
     267             : U_NAMESPACE_END
     268             : 
     269             : // C API ------------------------------------------------------------------- ***
     270             : 
     271             : U_NAMESPACE_USE
     272             : 
     273             : U_CAPI UNormalizer2 * U_EXPORT2
     274           0 : unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode) {
     275           0 :     if(U_FAILURE(*pErrorCode)) {
     276           0 :         return NULL;
     277             :     }
     278           0 :     if(filterSet==NULL) {
     279           0 :         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
     280           0 :         return NULL;
     281             :     }
     282             :     Normalizer2 *fn2=new FilteredNormalizer2(*(Normalizer2 *)norm2,
     283           0 :                                              *UnicodeSet::fromUSet(filterSet));
     284           0 :     if(fn2==NULL) {
     285           0 :         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
     286             :     }
     287           0 :     return (UNormalizer2 *)fn2;
     288             : }
     289             : 
     290             : #endif  // !UCONFIG_NO_NORMALIZATION

Generated by: LCOV version 1.13