LCOV - code coverage report
Current view: top level - intl/icu/source/i18n - uspoof.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 0 352 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 40 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             : ***************************************************************************
       5             : * Copyright (C) 2008-2015, International Business Machines Corporation
       6             : * and others. All Rights Reserved.
       7             : ***************************************************************************
       8             : *   file name:  uspoof.cpp
       9             : *   encoding:   UTF-8
      10             : *   tab size:   8 (not used)
      11             : *   indentation:4
      12             : *
      13             : *   created on: 2008Feb13
      14             : *   created by: Andy Heninger
      15             : *
      16             : *   Unicode Spoof Detection
      17             : */
      18             : #include "unicode/utypes.h"
      19             : #include "unicode/normalizer2.h"
      20             : #include "unicode/uspoof.h"
      21             : #include "unicode/ustring.h"
      22             : #include "unicode/utf16.h"
      23             : #include "cmemory.h"
      24             : #include "cstring.h"
      25             : #include "mutex.h"
      26             : #include "scriptset.h"
      27             : #include "uassert.h"
      28             : #include "ucln_in.h"
      29             : #include "uspoof_impl.h"
      30             : #include "umutex.h"
      31             : 
      32             : 
      33             : #if !UCONFIG_NO_NORMALIZATION
      34             : 
      35             : U_NAMESPACE_USE
      36             : 
      37             : 
      38             : //
      39             : // Static Objects used by the spoof impl, their thread safe initialization and their cleanup.
      40             : //
      41             : static UnicodeSet *gInclusionSet = NULL;
      42             : static UnicodeSet *gRecommendedSet = NULL;
      43             : static const Normalizer2 *gNfdNormalizer = NULL;
      44             : static UInitOnce gSpoofInitStaticsOnce = U_INITONCE_INITIALIZER;
      45             : 
      46             : static UBool U_CALLCONV
      47           0 : uspoof_cleanup(void) {
      48           0 :     delete gInclusionSet;
      49           0 :     gInclusionSet = NULL;
      50           0 :     delete gRecommendedSet;
      51           0 :     gRecommendedSet = NULL;
      52           0 :     gNfdNormalizer = NULL;
      53           0 :     gSpoofInitStaticsOnce.reset();
      54           0 :     return TRUE;
      55             : }
      56             : 
      57           0 : static void U_CALLCONV initializeStatics(UErrorCode &status) {
      58             :     static const char *inclusionPat = 
      59             :         "['\\-.\\:\\u00B7\\u0375\\u058A\\u05F3\\u05F4\\u06FD\\u06FE\\u0F0B\\u200C\\u200D\\u2010\\u"
      60             :         "2019\\u2027\\u30A0\\u30FB]";
      61           0 :     gInclusionSet = new UnicodeSet(UnicodeString(inclusionPat, -1, US_INV), status);
      62           0 :     gInclusionSet->freeze();
      63             : 
      64             :     // Note: data from http://unicode.org/Public/security/9.0.0/IdentifierStatus.txt
      65             :     // There is tooling to generate this constant in the unicodetools project:
      66             :     //      org.unicode.text.tools.RecommendedSetGenerator
      67             :     // It will print the Java and C++ code to the console for easy copy-paste into this file.
      68             :     // Note: concatenated string constants do not work with UNICODE_STRING_SIMPLE on all platforms.
      69             :     static const char *recommendedPat = 
      70             :         "[0-9A-Z_a-z\\u00C0-\\u00D6\\u00D8-\\u00F6\\u00F8-\\u0131\\u0134-\\u013E\\u0141-\\u014"
      71             :         "8\\u014A-\\u017E\\u018F\\u01A0\\u01A1\\u01AF\\u01B0\\u01CD-\\u01DC\\u01DE-\\u01E3\\u01E"
      72             :         "6-\\u01F0\\u01F4\\u01F5\\u01F8-\\u021B\\u021E\\u021F\\u0226-\\u0233\\u0259\\u02BB\\u02B"
      73             :         "C\\u02EC\\u0300-\\u0304\\u0306-\\u030C\\u030F-\\u0311\\u0313\\u0314\\u031B\\u0323-\\u03"
      74             :         "28\\u032D\\u032E\\u0330\\u0331\\u0335\\u0338\\u0339\\u0342\\u0345\\u037B-\\u037D\\u0386"
      75             :         "\\u0388-\\u038A\\u038C\\u038E-\\u03A1\\u03A3-\\u03CE\\u03FC-\\u045F\\u048A-\\u0529\\u05"
      76             :         "2E\\u052F\\u0531-\\u0556\\u0559\\u0561-\\u0586\\u05B4\\u05D0-\\u05EA\\u05F0-\\u05F2\\u0"
      77             :         "620-\\u063F\\u0641-\\u0655\\u0660-\\u0669\\u0670-\\u0672\\u0674\\u0679-\\u068D\\u068F-"
      78             :         "\\u06D3\\u06D5\\u06E5\\u06E6\\u06EE-\\u06FC\\u06FF\\u0750-\\u07B1\\u08A0-\\u08AC\\u08B2"
      79             :         "\\u08B6-\\u08BD\\u0901-\\u094D\\u094F\\u0950\\u0956\\u0957\\u0960-\\u0963\\u0966-\\u096"
      80             :         "F\\u0971-\\u0977\\u0979-\\u097F\\u0981-\\u0983\\u0985-\\u098C\\u098F\\u0990\\u0993-\\u0"
      81             :         "9A8\\u09AA-\\u09B0\\u09B2\\u09B6-\\u09B9\\u09BC-\\u09C4\\u09C7\\u09C8\\u09CB-\\u09CE\\u"
      82             :         "09D7\\u09E0-\\u09E3\\u09E6-\\u09F1\\u0A01-\\u0A03\\u0A05-\\u0A0A\\u0A0F\\u0A10\\u0A13-"
      83             :         "\\u0A28\\u0A2A-\\u0A30\\u0A32\\u0A35\\u0A38\\u0A39\\u0A3C\\u0A3E-\\u0A42\\u0A47\\u0A48\\"
      84             :         "u0A4B-\\u0A4D\\u0A5C\\u0A66-\\u0A74\\u0A81-\\u0A83\\u0A85-\\u0A8D\\u0A8F-\\u0A91\\u0A9"
      85             :         "3-\\u0AA8\\u0AAA-\\u0AB0\\u0AB2\\u0AB3\\u0AB5-\\u0AB9\\u0ABC-\\u0AC5\\u0AC7-\\u0AC9\\u0"
      86             :         "ACB-\\u0ACD\\u0AD0\\u0AE0-\\u0AE3\\u0AE6-\\u0AEF\\u0B01-\\u0B03\\u0B05-\\u0B0C\\u0B0F\\"
      87             :         "u0B10\\u0B13-\\u0B28\\u0B2A-\\u0B30\\u0B32\\u0B33\\u0B35-\\u0B39\\u0B3C-\\u0B43\\u0B47"
      88             :         "\\u0B48\\u0B4B-\\u0B4D\\u0B56\\u0B57\\u0B5F-\\u0B61\\u0B66-\\u0B6F\\u0B71\\u0B82\\u0B83"
      89             :         "\\u0B85-\\u0B8A\\u0B8E-\\u0B90\\u0B92-\\u0B95\\u0B99\\u0B9A\\u0B9C\\u0B9E\\u0B9F\\u0BA3"
      90             :         "\\u0BA4\\u0BA8-\\u0BAA\\u0BAE-\\u0BB9\\u0BBE-\\u0BC2\\u0BC6-\\u0BC8\\u0BCA-\\u0BCD\\u0B"
      91             :         "D0\\u0BD7\\u0BE6-\\u0BEF\\u0C01-\\u0C03\\u0C05-\\u0C0C\\u0C0E-\\u0C10\\u0C12-\\u0C28\\u"
      92             :         "0C2A-\\u0C33\\u0C35-\\u0C39\\u0C3D-\\u0C44\\u0C46-\\u0C48\\u0C4A-\\u0C4D\\u0C55\\u0C56"
      93             :         "\\u0C60\\u0C61\\u0C66-\\u0C6F\\u0C80\\u0C82\\u0C83\\u0C85-\\u0C8C\\u0C8E-\\u0C90\\u0C92"
      94             :         "-\\u0CA8\\u0CAA-\\u0CB3\\u0CB5-\\u0CB9\\u0CBC-\\u0CC4\\u0CC6-\\u0CC8\\u0CCA-\\u0CCD\\u0"
      95             :         "CD5\\u0CD6\\u0CE0-\\u0CE3\\u0CE6-\\u0CEF\\u0CF1\\u0CF2\\u0D02\\u0D03\\u0D05-\\u0D0C\\u0"
      96             :         "D0E-\\u0D10\\u0D12-\\u0D3A\\u0D3D-\\u0D43\\u0D46-\\u0D48\\u0D4A-\\u0D4E\\u0D54-\\u0D57"
      97             :         "\\u0D60\\u0D61\\u0D66-\\u0D6F\\u0D7A-\\u0D7F\\u0D82\\u0D83\\u0D85-\\u0D8E\\u0D91-\\u0D9"
      98             :         "6\\u0D9A-\\u0DA5\\u0DA7-\\u0DB1\\u0DB3-\\u0DBB\\u0DBD\\u0DC0-\\u0DC6\\u0DCA\\u0DCF-\\u0"
      99             :         "DD4\\u0DD6\\u0DD8-\\u0DDE\\u0DF2\\u0E01-\\u0E32\\u0E34-\\u0E3A\\u0E40-\\u0E4E\\u0E50-\\"
     100             :         "u0E59\\u0E81\\u0E82\\u0E84\\u0E87\\u0E88\\u0E8A\\u0E8D\\u0E94-\\u0E97\\u0E99-\\u0E9F\\u"
     101             :         "0EA1-\\u0EA3\\u0EA5\\u0EA7\\u0EAA\\u0EAB\\u0EAD-\\u0EB2\\u0EB4-\\u0EB9\\u0EBB-\\u0EBD\\"
     102             :         "u0EC0-\\u0EC4\\u0EC6\\u0EC8-\\u0ECD\\u0ED0-\\u0ED9\\u0EDE\\u0EDF\\u0F00\\u0F20-\\u0F29"
     103             :         "\\u0F35\\u0F37\\u0F3E-\\u0F42\\u0F44-\\u0F47\\u0F49-\\u0F4C\\u0F4E-\\u0F51\\u0F53-\\u0F"
     104             :         "56\\u0F58-\\u0F5B\\u0F5D-\\u0F68\\u0F6A-\\u0F6C\\u0F71\\u0F72\\u0F74\\u0F7A-\\u0F80\\u0"
     105             :         "F82-\\u0F84\\u0F86-\\u0F92\\u0F94-\\u0F97\\u0F99-\\u0F9C\\u0F9E-\\u0FA1\\u0FA3-\\u0FA6"
     106             :         "\\u0FA8-\\u0FAB\\u0FAD-\\u0FB8\\u0FBA-\\u0FBC\\u0FC6\\u1000-\\u1049\\u1050-\\u109D\\u10"
     107             :         "C7\\u10CD\\u10D0-\\u10F0\\u10F7-\\u10FA\\u10FD-\\u10FF\\u1200-\\u1248\\u124A-\\u124D\\u"
     108             :         "1250-\\u1256\\u1258\\u125A-\\u125D\\u1260-\\u1288\\u128A-\\u128D\\u1290-\\u12B0\\u12B2"
     109             :         "-\\u12B5\\u12B8-\\u12BE\\u12C0\\u12C2-\\u12C5\\u12C8-\\u12D6\\u12D8-\\u1310\\u1312-\\u1"
     110             :         "315\\u1318-\\u135A\\u135D-\\u135F\\u1380-\\u138F\\u1780-\\u17A2\\u17A5-\\u17A7\\u17A9-"
     111             :         "\\u17B3\\u17B6-\\u17CA\\u17D2\\u17D7\\u17DC\\u17E0-\\u17E9\\u1C80-\\u1C88\\u1E00-\\u1E9"
     112             :         "9\\u1E9E\\u1EA0-\\u1EF9\\u1F00-\\u1F15\\u1F18-\\u1F1D\\u1F20-\\u1F45\\u1F48-\\u1F4D\\u1"
     113             :         "F50-\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F70\\u1F72\\u1F74\\u1F76\\u1F78\\u1F7A\\u1F"
     114             :         "7C\\u1F80-\\u1FB4\\u1FB6-\\u1FBA\\u1FBC\\u1FC2-\\u1FC4\\u1FC6-\\u1FC8\\u1FCA\\u1FCC\\u1"
     115             :         "FD0-\\u1FD2\\u1FD6-\\u1FDA\\u1FE0-\\u1FE2\\u1FE4-\\u1FEA\\u1FEC\\u1FF2-\\u1FF4\\u1FF6-"
     116             :         "\\u1FF8\\u1FFA\\u1FFC\\u2D27\\u2D2D\\u2D80-\\u2D96\\u2DA0-\\u2DA6\\u2DA8-\\u2DAE\\u2DB0"
     117             :         "-\\u2DB6\\u2DB8-\\u2DBE\\u2DC0-\\u2DC6\\u2DC8-\\u2DCE\\u2DD0-\\u2DD6\\u2DD8-\\u2DDE\\u3"
     118             :         "005-\\u3007\\u3041-\\u3096\\u3099\\u309A\\u309D\\u309E\\u30A1-\\u30FA\\u30FC-\\u30FE\\u"
     119             :         "3105-\\u312D\\u31A0-\\u31BA\\u3400-\\u4DB5\\u4E00-\\u9FD5\\uA660\\uA661\\uA674-\\uA67B"
     120             :         "\\uA67F\\uA69F\\uA717-\\uA71F\\uA788\\uA78D\\uA78E\\uA790-\\uA793\\uA7A0-\\uA7AA\\uA7AE"
     121             :         "\\uA7FA\\uA9E7-\\uA9FE\\uAA60-\\uAA76\\uAA7A-\\uAA7F\\uAB01-\\uAB06\\uAB09-\\uAB0E\\uAB"
     122             :         "11-\\uAB16\\uAB20-\\uAB26\\uAB28-\\uAB2E\\uAC00-\\uD7A3\\uFA0E\\uFA0F\\uFA11\\uFA13\\uF"
     123             :         "A14\\uFA1F\\uFA21\\uFA23\\uFA24\\uFA27-\\uFA29\\U00020000-\\U0002A6D6\\U0002A700-\\U0"
     124             :         "002B734\\U0002B740-\\U0002B81D\\U0002B820-\\U0002CEA1]";
     125             : 
     126           0 :     gRecommendedSet = new UnicodeSet(UnicodeString(recommendedPat, -1, US_INV), status);
     127           0 :     gRecommendedSet->freeze();
     128           0 :     gNfdNormalizer = Normalizer2::getNFDInstance(status);
     129           0 :     ucln_i18n_registerCleanup(UCLN_I18N_SPOOF, uspoof_cleanup);
     130           0 : }
     131             : 
     132           0 : U_CFUNC void uspoof_internalInitStatics(UErrorCode *status) {
     133           0 :     umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
     134           0 : }
     135             : 
     136             : U_CAPI USpoofChecker * U_EXPORT2
     137           0 : uspoof_open(UErrorCode *status) {
     138           0 :     umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
     139           0 :     if (U_FAILURE(*status)) {
     140           0 :         return NULL;
     141             :     }
     142           0 :     SpoofImpl *si = new SpoofImpl(*status);
     143           0 :     if (U_SUCCESS(*status) && si == NULL) {
     144           0 :         *status = U_MEMORY_ALLOCATION_ERROR;
     145             :     }
     146           0 :     if (U_FAILURE(*status)) {
     147           0 :         delete si;
     148           0 :         si = NULL;
     149             :     }
     150           0 :     return si->asUSpoofChecker();
     151             : }
     152             : 
     153             : 
     154             : U_CAPI USpoofChecker * U_EXPORT2
     155           0 : uspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLength,
     156             :                           UErrorCode *status) {
     157           0 :     if (U_FAILURE(*status)) {
     158           0 :         return NULL;
     159             :     }
     160           0 :     umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
     161           0 :     SpoofData *sd = new SpoofData(data, length, *status);
     162           0 :     SpoofImpl *si = new SpoofImpl(sd, *status);
     163           0 :     if (U_FAILURE(*status)) {
     164           0 :         delete sd;
     165           0 :         delete si;
     166           0 :         return NULL;
     167             :     }
     168           0 :     if (sd == NULL || si == NULL) {
     169           0 :         *status = U_MEMORY_ALLOCATION_ERROR;
     170           0 :         delete sd;
     171           0 :         delete si;
     172           0 :         return NULL;
     173             :     }
     174             :         
     175           0 :     if (pActualLength != NULL) {
     176           0 :         *pActualLength = sd->size();
     177             :     }
     178           0 :     return si->asUSpoofChecker();
     179             : }
     180             : 
     181             : 
     182             : U_CAPI USpoofChecker * U_EXPORT2
     183           0 : uspoof_clone(const USpoofChecker *sc, UErrorCode *status) {
     184           0 :     const SpoofImpl *src = SpoofImpl::validateThis(sc, *status);
     185           0 :     if (src == NULL) {
     186           0 :         return NULL;
     187             :     }
     188           0 :     SpoofImpl *result = new SpoofImpl(*src, *status);   // copy constructor
     189           0 :     if (U_FAILURE(*status)) {
     190           0 :         delete result;
     191           0 :         result = NULL;
     192             :     }
     193           0 :     return result->asUSpoofChecker();
     194             : }
     195             : 
     196             : 
     197             : U_CAPI void U_EXPORT2
     198           0 : uspoof_close(USpoofChecker *sc) {
     199           0 :     UErrorCode status = U_ZERO_ERROR;
     200           0 :     SpoofImpl *This = SpoofImpl::validateThis(sc, status);
     201           0 :     delete This;
     202           0 : }
     203             : 
     204             : 
     205             : U_CAPI void U_EXPORT2
     206           0 : uspoof_setChecks(USpoofChecker *sc, int32_t checks, UErrorCode *status) {
     207           0 :     SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
     208           0 :     if (This == NULL) {
     209           0 :         return;
     210             :     }
     211             : 
     212             :     // Verify that the requested checks are all ones (bits) that 
     213             :     //   are acceptable, known values.
     214           0 :     if (checks & ~(USPOOF_ALL_CHECKS | USPOOF_AUX_INFO)) {
     215           0 :         *status = U_ILLEGAL_ARGUMENT_ERROR; 
     216           0 :         return;
     217             :     }
     218             : 
     219           0 :     This->fChecks = checks;
     220             : }
     221             : 
     222             : 
     223             : U_CAPI int32_t U_EXPORT2
     224           0 : uspoof_getChecks(const USpoofChecker *sc, UErrorCode *status) {
     225           0 :     const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
     226           0 :     if (This == NULL) {
     227           0 :         return 0;
     228             :     }
     229           0 :     return This->fChecks;
     230             : }
     231             : 
     232             : U_CAPI void U_EXPORT2
     233           0 : uspoof_setRestrictionLevel(USpoofChecker *sc, URestrictionLevel restrictionLevel) {
     234           0 :     UErrorCode status = U_ZERO_ERROR;
     235           0 :     SpoofImpl *This = SpoofImpl::validateThis(sc, status);
     236           0 :     if (This != NULL) {
     237           0 :         This->fRestrictionLevel = restrictionLevel;
     238           0 :         This->fChecks |= USPOOF_RESTRICTION_LEVEL;
     239             :     }
     240           0 : }
     241             : 
     242             : U_CAPI URestrictionLevel U_EXPORT2
     243           0 : uspoof_getRestrictionLevel(const USpoofChecker *sc) {
     244           0 :     UErrorCode status = U_ZERO_ERROR;
     245           0 :     const SpoofImpl *This = SpoofImpl::validateThis(sc, status);
     246           0 :     if (This == NULL) {
     247           0 :         return USPOOF_UNRESTRICTIVE;
     248             :     }
     249           0 :     return This->fRestrictionLevel;
     250             : }
     251             : 
     252             : U_CAPI void U_EXPORT2
     253           0 : uspoof_setAllowedLocales(USpoofChecker *sc, const char *localesList, UErrorCode *status) {
     254           0 :     SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
     255           0 :     if (This == NULL) {
     256           0 :         return;
     257             :     }
     258           0 :     This->setAllowedLocales(localesList, *status);
     259             : }
     260             : 
     261             : U_CAPI const char * U_EXPORT2
     262           0 : uspoof_getAllowedLocales(USpoofChecker *sc, UErrorCode *status) {
     263           0 :     SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
     264           0 :     if (This == NULL) {
     265           0 :         return NULL;
     266             :     }
     267           0 :     return This->getAllowedLocales(*status);
     268             : }
     269             : 
     270             : 
     271             : U_CAPI const USet * U_EXPORT2
     272           0 : uspoof_getAllowedChars(const USpoofChecker *sc, UErrorCode *status) {
     273           0 :     const UnicodeSet *result = uspoof_getAllowedUnicodeSet(sc, status);
     274           0 :     return result->toUSet();
     275             : }
     276             : 
     277             : U_CAPI const UnicodeSet * U_EXPORT2
     278           0 : uspoof_getAllowedUnicodeSet(const USpoofChecker *sc, UErrorCode *status) {
     279           0 :     const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
     280           0 :     if (This == NULL) {
     281           0 :         return NULL;
     282             :     }
     283           0 :     return This->fAllowedCharsSet;
     284             : }
     285             : 
     286             : 
     287             : U_CAPI void U_EXPORT2
     288           0 : uspoof_setAllowedChars(USpoofChecker *sc, const USet *chars, UErrorCode *status) {
     289           0 :     const UnicodeSet *set = UnicodeSet::fromUSet(chars);
     290           0 :     uspoof_setAllowedUnicodeSet(sc, set, status);
     291           0 : }
     292             : 
     293             : 
     294             : U_CAPI void U_EXPORT2
     295           0 : uspoof_setAllowedUnicodeSet(USpoofChecker *sc, const UnicodeSet *chars, UErrorCode *status) {
     296           0 :     SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
     297           0 :     if (This == NULL) {
     298           0 :         return;
     299             :     }
     300           0 :     if (chars->isBogus()) {
     301           0 :         *status = U_ILLEGAL_ARGUMENT_ERROR;
     302           0 :         return;
     303             :     }
     304           0 :     UnicodeSet *clonedSet = static_cast<UnicodeSet *>(chars->clone());
     305           0 :     if (clonedSet == NULL || clonedSet->isBogus()) {
     306           0 :         *status = U_MEMORY_ALLOCATION_ERROR;
     307           0 :         return;
     308             :     }
     309           0 :     clonedSet->freeze();
     310           0 :     delete This->fAllowedCharsSet;
     311           0 :     This->fAllowedCharsSet = clonedSet;
     312           0 :     This->fChecks |= USPOOF_CHAR_LIMIT;
     313             : }
     314             : 
     315             : 
     316             : U_CAPI int32_t U_EXPORT2
     317           0 : uspoof_check(const USpoofChecker *sc,
     318             :              const UChar *id, int32_t length,
     319             :              int32_t *position,
     320             :              UErrorCode *status) {
     321             : 
     322             :     // Backwards compatibility:
     323           0 :     if (position != NULL) {
     324           0 :         *position = 0;
     325             :     }
     326             : 
     327             :     // Delegate to uspoof_check2
     328           0 :     return uspoof_check2(sc, id, length, NULL, status);
     329             : }
     330             : 
     331             : 
     332             : U_CAPI int32_t U_EXPORT2
     333           0 : uspoof_check2(const USpoofChecker *sc,
     334             :     const UChar* id, int32_t length,
     335             :     USpoofCheckResult* checkResult,
     336             :     UErrorCode *status) {
     337             : 
     338           0 :     const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
     339           0 :     if (This == NULL) {
     340           0 :         return 0;
     341             :     }
     342           0 :     if (length < -1) {
     343           0 :         *status = U_ILLEGAL_ARGUMENT_ERROR;
     344           0 :         return 0;
     345             :     }
     346           0 :     UnicodeString idStr((length == -1), id, length);  // Aliasing constructor.
     347           0 :     int32_t result = uspoof_check2UnicodeString(sc, idStr, checkResult, status);
     348           0 :     return result;
     349             : }
     350             : 
     351             : 
     352             : U_CAPI int32_t U_EXPORT2
     353           0 : uspoof_checkUTF8(const USpoofChecker *sc,
     354             :                  const char *id, int32_t length,
     355             :                  int32_t *position,
     356             :                  UErrorCode *status) {
     357             : 
     358             :     // Backwards compatibility:
     359           0 :     if (position != NULL) {
     360           0 :         *position = 0;
     361             :     }
     362             : 
     363             :     // Delegate to uspoof_check2
     364           0 :     return uspoof_check2UTF8(sc, id, length, NULL, status);
     365             : }
     366             : 
     367             : 
     368             : U_CAPI int32_t U_EXPORT2
     369           0 : uspoof_check2UTF8(const USpoofChecker *sc,
     370             :     const char *id, int32_t length,
     371             :     USpoofCheckResult* checkResult,
     372             :     UErrorCode *status) {
     373             : 
     374           0 :     if (U_FAILURE(*status)) {
     375           0 :         return 0;
     376             :     }
     377           0 :     UnicodeString idStr = UnicodeString::fromUTF8(StringPiece(id, length>=0 ? length : uprv_strlen(id)));
     378           0 :     int32_t result = uspoof_check2UnicodeString(sc, idStr, checkResult, status);
     379           0 :     return result;
     380             : }
     381             : 
     382             : 
     383             : U_CAPI int32_t U_EXPORT2
     384           0 : uspoof_areConfusable(const USpoofChecker *sc,
     385             :                      const UChar *id1, int32_t length1,
     386             :                      const UChar *id2, int32_t length2,
     387             :                      UErrorCode *status) {
     388           0 :     SpoofImpl::validateThis(sc, *status);
     389           0 :     if (U_FAILURE(*status)) {
     390           0 :         return 0;
     391             :     }
     392           0 :     if (length1 < -1 || length2 < -1) {
     393           0 :         *status = U_ILLEGAL_ARGUMENT_ERROR;
     394           0 :         return 0;
     395             :     }
     396             :         
     397           0 :     UnicodeString id1Str((length1==-1), id1, length1);  // Aliasing constructor
     398           0 :     UnicodeString id2Str((length2==-1), id2, length2);  // Aliasing constructor
     399           0 :     return uspoof_areConfusableUnicodeString(sc, id1Str, id2Str, status);
     400             : }
     401             : 
     402             : 
     403             : U_CAPI int32_t U_EXPORT2
     404           0 : uspoof_areConfusableUTF8(const USpoofChecker *sc,
     405             :                          const char *id1, int32_t length1,
     406             :                          const char *id2, int32_t length2,
     407             :                          UErrorCode *status) {
     408           0 :     SpoofImpl::validateThis(sc, *status);
     409           0 :     if (U_FAILURE(*status)) {
     410           0 :         return 0;
     411             :     }
     412           0 :     if (length1 < -1 || length2 < -1) {
     413           0 :         *status = U_ILLEGAL_ARGUMENT_ERROR;
     414           0 :         return 0;
     415             :     }
     416           0 :     UnicodeString id1Str = UnicodeString::fromUTF8(StringPiece(id1, length1>=0? length1 : uprv_strlen(id1)));
     417           0 :     UnicodeString id2Str = UnicodeString::fromUTF8(StringPiece(id2, length2>=0? length2 : uprv_strlen(id2)));
     418           0 :     int32_t results = uspoof_areConfusableUnicodeString(sc, id1Str, id2Str, status);
     419           0 :     return results;
     420             : }
     421             :  
     422             : 
     423             : U_CAPI int32_t U_EXPORT2
     424           0 : uspoof_areConfusableUnicodeString(const USpoofChecker *sc,
     425             :                                   const icu::UnicodeString &id1,
     426             :                                   const icu::UnicodeString &id2,
     427             :                                   UErrorCode *status) {
     428           0 :     const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
     429           0 :     if (U_FAILURE(*status)) {
     430           0 :         return 0;
     431             :     }
     432             :     //
     433             :     // See section 4 of UAX 39 for the algorithm for checking whether two strings are confusable,
     434             :     //   and for definitions of the types (single, whole, mixed-script) of confusables.
     435             :     
     436             :     // We only care about a few of the check flags.  Ignore the others.
     437             :     // If no tests relavant to this function have been specified, return an error.
     438             :     // TODO:  is this really the right thing to do?  It's probably an error on the caller's part,
     439             :     //        but logically we would just return 0 (no error).
     440           0 :     if ((This->fChecks & USPOOF_CONFUSABLE) == 0) {
     441           0 :         *status = U_INVALID_STATE_ERROR;
     442           0 :         return 0;
     443             :     }
     444             : 
     445             :     // Compute the skeletons and check for confusability.
     446           0 :     UnicodeString id1Skeleton;
     447           0 :     uspoof_getSkeletonUnicodeString(sc, 0 /* deprecated */, id1, id1Skeleton, status);
     448           0 :     UnicodeString id2Skeleton;
     449           0 :     uspoof_getSkeletonUnicodeString(sc, 0 /* deprecated */, id2, id2Skeleton, status);
     450           0 :     if (U_FAILURE(*status)) { return 0; }
     451           0 :     if (id1Skeleton != id2Skeleton) {
     452           0 :         return 0;
     453             :     }
     454             : 
     455             :     // If we get here, the strings are confusable.  Now we just need to set the flags for the appropriate classes
     456             :     // of confusables according to UTS 39 section 4.
     457             :     // Start by computing the resolved script sets of id1 and id2.
     458           0 :     ScriptSet id1RSS;
     459           0 :     This->getResolvedScriptSet(id1, id1RSS, *status);
     460           0 :     ScriptSet id2RSS;
     461           0 :     This->getResolvedScriptSet(id2, id2RSS, *status);
     462             : 
     463             :     // Turn on all applicable flags
     464           0 :     int32_t result = 0;
     465           0 :     if (id1RSS.intersects(id2RSS)) {
     466           0 :         result |= USPOOF_SINGLE_SCRIPT_CONFUSABLE;
     467             :     } else {
     468           0 :         result |= USPOOF_MIXED_SCRIPT_CONFUSABLE;
     469           0 :         if (!id1RSS.isEmpty() && !id2RSS.isEmpty()) {
     470           0 :             result |= USPOOF_WHOLE_SCRIPT_CONFUSABLE;
     471             :         }
     472             :     }
     473             : 
     474             :     // Turn off flags that the user doesn't want
     475           0 :     if ((This->fChecks & USPOOF_SINGLE_SCRIPT_CONFUSABLE) == 0) {
     476           0 :         result &= ~USPOOF_SINGLE_SCRIPT_CONFUSABLE;
     477             :     }
     478           0 :     if ((This->fChecks & USPOOF_MIXED_SCRIPT_CONFUSABLE) == 0) {
     479           0 :         result &= ~USPOOF_MIXED_SCRIPT_CONFUSABLE;
     480             :     }
     481           0 :     if ((This->fChecks & USPOOF_WHOLE_SCRIPT_CONFUSABLE) == 0) {
     482           0 :         result &= ~USPOOF_WHOLE_SCRIPT_CONFUSABLE;
     483             :     }
     484             : 
     485           0 :     return result;
     486             : }
     487             : 
     488             : 
     489             : U_CAPI int32_t U_EXPORT2
     490           0 : uspoof_checkUnicodeString(const USpoofChecker *sc,
     491             :                           const icu::UnicodeString &id,
     492             :                           int32_t *position,
     493             :                           UErrorCode *status) {
     494             : 
     495             :     // Backwards compatibility:
     496           0 :     if (position != NULL) {
     497           0 :         *position = 0;
     498             :     }
     499             : 
     500             :     // Delegate to uspoof_check2
     501           0 :     return uspoof_check2UnicodeString(sc, id, NULL, status);
     502             : }
     503             : 
     504           0 : int32_t checkImpl(const SpoofImpl* This, const UnicodeString& id, CheckResult* checkResult, UErrorCode* status) {
     505           0 :     U_ASSERT(This != NULL);
     506           0 :     U_ASSERT(checkResult != NULL);
     507           0 :     checkResult->clear();
     508           0 :     int32_t result = 0;
     509             : 
     510           0 :     if (0 != (This->fChecks & USPOOF_RESTRICTION_LEVEL)) {
     511           0 :         URestrictionLevel idRestrictionLevel = This->getRestrictionLevel(id, *status);
     512           0 :         if (idRestrictionLevel > This->fRestrictionLevel) {
     513           0 :             result |= USPOOF_RESTRICTION_LEVEL;
     514             :         }
     515           0 :         checkResult->fRestrictionLevel = idRestrictionLevel;
     516             :     }
     517             : 
     518           0 :     if (0 != (This->fChecks & USPOOF_MIXED_NUMBERS)) {
     519           0 :         UnicodeSet numerics;
     520           0 :         This->getNumerics(id, numerics, *status);
     521           0 :         if (numerics.size() > 1) {
     522           0 :             result |= USPOOF_MIXED_NUMBERS;
     523             :         }
     524           0 :         checkResult->fNumerics = numerics;  // UnicodeSet::operator=
     525             :     }
     526             : 
     527             : 
     528           0 :     if (0 != (This->fChecks & USPOOF_CHAR_LIMIT)) {
     529             :         int32_t i;
     530             :         UChar32 c;
     531           0 :         int32_t length = id.length();
     532           0 :         for (i=0; i<length ;) {
     533           0 :             c = id.char32At(i);
     534           0 :             i += U16_LENGTH(c);
     535           0 :             if (!This->fAllowedCharsSet->contains(c)) {
     536           0 :                 result |= USPOOF_CHAR_LIMIT;
     537           0 :                 break;
     538             :             }
     539             :         }
     540             :     }
     541             : 
     542           0 :     if (0 != (This->fChecks & USPOOF_INVISIBLE)) {
     543             :         // This check needs to be done on NFD input
     544           0 :         UnicodeString nfdText;
     545           0 :         gNfdNormalizer->normalize(id, nfdText, *status);
     546           0 :         int32_t nfdLength = nfdText.length();
     547             : 
     548             :         // scan for more than one occurence of the same non-spacing mark
     549             :         // in a sequence of non-spacing marks.
     550             :         int32_t     i;
     551             :         UChar32     c;
     552           0 :         UChar32     firstNonspacingMark = 0;
     553           0 :         UBool       haveMultipleMarks = FALSE;  
     554           0 :         UnicodeSet  marksSeenSoFar;   // Set of combining marks in a single combining sequence.
     555             :         
     556           0 :         for (i=0; i<nfdLength ;) {
     557           0 :             c = nfdText.char32At(i);
     558           0 :             i += U16_LENGTH(c);
     559           0 :             if (u_charType(c) != U_NON_SPACING_MARK) {
     560           0 :                 firstNonspacingMark = 0;
     561           0 :                 if (haveMultipleMarks) {
     562           0 :                     marksSeenSoFar.clear();
     563           0 :                     haveMultipleMarks = FALSE;
     564             :                 }
     565           0 :                 continue;
     566             :             }
     567           0 :             if (firstNonspacingMark == 0) {
     568           0 :                 firstNonspacingMark = c;
     569           0 :                 continue;
     570             :             }
     571           0 :             if (!haveMultipleMarks) {
     572           0 :                 marksSeenSoFar.add(firstNonspacingMark);
     573           0 :                 haveMultipleMarks = TRUE;
     574             :             }
     575           0 :             if (marksSeenSoFar.contains(c)) {
     576             :                 // report the error, and stop scanning.
     577             :                 // No need to find more than the first failure.
     578           0 :                 result |= USPOOF_INVISIBLE;
     579           0 :                 break;
     580             :             }
     581           0 :             marksSeenSoFar.add(c);
     582             :         }
     583             :     }
     584             : 
     585           0 :     checkResult->fChecks = result;
     586           0 :     return checkResult->toCombinedBitmask(This->fChecks);
     587             : }
     588             : 
     589             : U_CAPI int32_t U_EXPORT2
     590           0 : uspoof_check2UnicodeString(const USpoofChecker *sc,
     591             :                           const icu::UnicodeString &id,
     592             :                           USpoofCheckResult* checkResult,
     593             :                           UErrorCode *status) {
     594           0 :     const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
     595           0 :     if (This == NULL) {
     596           0 :         return FALSE;
     597             :     }
     598             : 
     599           0 :     if (checkResult != NULL) {
     600           0 :         CheckResult* ThisCheckResult = CheckResult::validateThis(checkResult, *status);
     601           0 :         if (ThisCheckResult == NULL) {
     602           0 :             return FALSE;
     603             :         }
     604           0 :         return checkImpl(This, id, ThisCheckResult, status);
     605             :     } else {
     606             :         // Stack-allocate the checkResult since this method doesn't return it
     607           0 :         CheckResult stackCheckResult;
     608           0 :         return checkImpl(This, id, &stackCheckResult, status);
     609             :     }
     610             : }
     611             : 
     612             : 
     613             : U_CAPI int32_t U_EXPORT2
     614           0 : uspoof_getSkeleton(const USpoofChecker *sc,
     615             :                    uint32_t type,
     616             :                    const UChar *id,  int32_t length,
     617             :                    UChar *dest, int32_t destCapacity,
     618             :                    UErrorCode *status) {
     619             : 
     620           0 :     SpoofImpl::validateThis(sc, *status);
     621           0 :     if (U_FAILURE(*status)) {
     622           0 :         return 0;
     623             :     }
     624           0 :     if (length<-1 || destCapacity<0 || (destCapacity==0 && dest!=NULL)) {
     625           0 :         *status = U_ILLEGAL_ARGUMENT_ERROR;
     626           0 :         return 0;
     627             :     }
     628             : 
     629           0 :     UnicodeString idStr((length==-1), id, length);  // Aliasing constructor
     630           0 :     UnicodeString destStr;
     631           0 :     uspoof_getSkeletonUnicodeString(sc, type, idStr, destStr, status);
     632           0 :     destStr.extract(dest, destCapacity, *status);
     633           0 :     return destStr.length();
     634             : }
     635             : 
     636             : 
     637             : 
     638             : U_I18N_API UnicodeString &  U_EXPORT2
     639           0 : uspoof_getSkeletonUnicodeString(const USpoofChecker *sc,
     640             :                                 uint32_t /*type*/,
     641             :                                 const UnicodeString &id,
     642             :                                 UnicodeString &dest,
     643             :                                 UErrorCode *status) {
     644           0 :     const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
     645           0 :     if (U_FAILURE(*status)) {
     646           0 :         return dest;
     647             :     }
     648             : 
     649           0 :     UnicodeString nfdId;
     650           0 :     gNfdNormalizer->normalize(id, nfdId, *status);
     651             : 
     652             :     // Apply the skeleton mapping to the NFD normalized input string
     653             :     // Accumulate the skeleton, possibly unnormalized, in a UnicodeString.
     654           0 :     int32_t inputIndex = 0;
     655           0 :     UnicodeString skelStr;
     656           0 :     int32_t normalizedLen = nfdId.length();
     657           0 :     for (inputIndex=0; inputIndex < normalizedLen; ) {
     658           0 :         UChar32 c = nfdId.char32At(inputIndex);
     659           0 :         inputIndex += U16_LENGTH(c);
     660           0 :         This->fSpoofData->confusableLookup(c, skelStr);
     661             :     }
     662             : 
     663           0 :     gNfdNormalizer->normalize(skelStr, dest, *status);
     664           0 :     return dest;
     665             : }
     666             : 
     667             : 
     668             : U_CAPI int32_t U_EXPORT2
     669           0 : uspoof_getSkeletonUTF8(const USpoofChecker *sc,
     670             :                        uint32_t type,
     671             :                        const char *id,  int32_t length,
     672             :                        char *dest, int32_t destCapacity,
     673             :                        UErrorCode *status) {
     674           0 :     SpoofImpl::validateThis(sc, *status);
     675           0 :     if (U_FAILURE(*status)) {
     676           0 :         return 0;
     677             :     }
     678           0 :     if (length<-1 || destCapacity<0 || (destCapacity==0 && dest!=NULL)) {
     679           0 :         *status = U_ILLEGAL_ARGUMENT_ERROR;
     680           0 :         return 0;
     681             :     }
     682             : 
     683           0 :     UnicodeString srcStr = UnicodeString::fromUTF8(StringPiece(id, length>=0 ? length : uprv_strlen(id)));
     684           0 :     UnicodeString destStr;
     685           0 :     uspoof_getSkeletonUnicodeString(sc, type, srcStr, destStr, status);
     686           0 :     if (U_FAILURE(*status)) {
     687           0 :         return 0;
     688             :     }
     689             : 
     690           0 :     int32_t lengthInUTF8 = 0;
     691           0 :     u_strToUTF8(dest, destCapacity, &lengthInUTF8,
     692           0 :                 destStr.getBuffer(), destStr.length(), status);
     693           0 :     return lengthInUTF8;
     694             : }
     695             : 
     696             : 
     697             : U_CAPI int32_t U_EXPORT2
     698           0 : uspoof_serialize(USpoofChecker *sc,void *buf, int32_t capacity, UErrorCode *status) {
     699           0 :     SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
     700           0 :     if (This == NULL) {
     701           0 :         U_ASSERT(U_FAILURE(*status));
     702           0 :         return 0;
     703             :     }
     704             : 
     705           0 :     return This->fSpoofData->serialize(buf, capacity, *status);
     706             : }
     707             : 
     708             : U_CAPI const USet * U_EXPORT2
     709           0 : uspoof_getInclusionSet(UErrorCode *status) {
     710           0 :     umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
     711           0 :     return gInclusionSet->toUSet();
     712             : }
     713             : 
     714             : U_CAPI const USet * U_EXPORT2
     715           0 : uspoof_getRecommendedSet(UErrorCode *status) {
     716           0 :     umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
     717           0 :     return gRecommendedSet->toUSet();
     718             : }
     719             : 
     720             : U_I18N_API const UnicodeSet * U_EXPORT2
     721           0 : uspoof_getInclusionUnicodeSet(UErrorCode *status) {
     722           0 :     umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
     723           0 :     return gInclusionSet;
     724             : }
     725             : 
     726             : U_I18N_API const UnicodeSet * U_EXPORT2
     727           0 : uspoof_getRecommendedUnicodeSet(UErrorCode *status) {
     728           0 :     umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
     729           0 :     return gRecommendedSet;
     730             : }
     731             : 
     732             : //------------------
     733             : // CheckResult APIs
     734             : //------------------
     735             : 
     736             : U_CAPI USpoofCheckResult* U_EXPORT2
     737           0 : uspoof_openCheckResult(UErrorCode *status) {
     738           0 :     CheckResult* checkResult = new CheckResult();
     739           0 :     if (checkResult == NULL) {
     740           0 :         *status = U_MEMORY_ALLOCATION_ERROR;
     741           0 :         return NULL;
     742             :     }
     743           0 :     return checkResult->asUSpoofCheckResult();
     744             : }
     745             : 
     746             : U_CAPI void U_EXPORT2
     747           0 : uspoof_closeCheckResult(USpoofCheckResult* checkResult) {
     748           0 :     UErrorCode status = U_ZERO_ERROR;
     749           0 :     CheckResult* This = CheckResult::validateThis(checkResult, status);
     750           0 :     delete This;
     751           0 : }
     752             : 
     753             : U_CAPI int32_t U_EXPORT2
     754           0 : uspoof_getCheckResultChecks(const USpoofCheckResult *checkResult, UErrorCode *status) {
     755           0 :     const CheckResult* This = CheckResult::validateThis(checkResult, *status);
     756           0 :     if (U_FAILURE(*status)) { return 0; }
     757           0 :     return This->fChecks;
     758             : }
     759             : 
     760             : U_CAPI URestrictionLevel U_EXPORT2
     761           0 : uspoof_getCheckResultRestrictionLevel(const USpoofCheckResult *checkResult, UErrorCode *status) {
     762           0 :     const CheckResult* This = CheckResult::validateThis(checkResult, *status);
     763           0 :     if (U_FAILURE(*status)) { return USPOOF_UNRESTRICTIVE; }
     764           0 :     return This->fRestrictionLevel;
     765             : }
     766             : 
     767             : U_CAPI const USet* U_EXPORT2
     768           0 : uspoof_getCheckResultNumerics(const USpoofCheckResult *checkResult, UErrorCode *status) {
     769           0 :     const CheckResult* This = CheckResult::validateThis(checkResult, *status);
     770           0 :     if (U_FAILURE(*status)) { return NULL; }
     771           0 :     return This->fNumerics.toUSet();
     772             : }
     773             : 
     774             : 
     775             : 
     776             : #endif // !UCONFIG_NO_NORMALIZATION

Generated by: LCOV version 1.13