LCOV - output.info - intl/icu/source/common/uts46.cpp

LCOV - code coverage report

Current view:	top level - intl/icu/source/common - uts46.cpp (source / functions)		Hit	Total	Coverage
Test:	output.info	Lines:	11	713	1.5 %
Date:	2017-07-14 16:53:18	Functions:	3	44	6.8 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             : *******************************************************************************
       5             : *   Copyright (C) 2010-2015, International Business Machines
       6             : *   Corporation and others.  All Rights Reserved.
       7             : *******************************************************************************
       8             : *   file name:  uts46.cpp
       9             : *   encoding:   UTF-8
      10             : *   tab size:   8 (not used)
      11             : *   indentation:4
      12             : *
      13             : *   created on: 2010mar09
      14             : *   created by: Markus W. Scherer
      15             : */
      16             : 
      17             : #include "unicode/utypes.h"
      18             : 
      19             : #if !UCONFIG_NO_IDNA
      20             : 
      21             : #include "unicode/idna.h"
      22             : #include "unicode/normalizer2.h"
      23             : #include "unicode/uscript.h"
      24             : #include "unicode/ustring.h"
      25             : #include "unicode/utf16.h"
      26             : #include "cmemory.h"
      27             : #include "cstring.h"
      28             : #include "punycode.h"
      29             : #include "ubidi_props.h"
      30             : #include "ustr_imp.h"
      31             : 
      32             : // Note about tests for UIDNA_ERROR_DOMAIN_NAME_TOO_LONG:
      33             : //
      34             : // The domain name length limit is 255 octets in an internal DNS representation
      35             : // where the last ("root") label is the empty label
      36             : // represented by length byte 0 alone.
      37             : // In a conventional string, this translates to 253 characters, or 254
      38             : // if there is a trailing dot for the root label.
      39             : 
      40             : U_NAMESPACE_BEGIN
      41             : 
      42             : // Severe errors which usually result in a U+FFFD replacement character in the result string.
      43             : const uint32_t severeErrors=
      44             :     UIDNA_ERROR_LEADING_COMBINING_MARK|
      45             :     UIDNA_ERROR_DISALLOWED|
      46             :     UIDNA_ERROR_PUNYCODE|
      47             :     UIDNA_ERROR_LABEL_HAS_DOT|
      48             :     UIDNA_ERROR_INVALID_ACE_LABEL;
      49             : 
      50             : static inline UBool
      51           0 : isASCIIString(const UnicodeString &dest) {
      52           0 :     const UChar *s=dest.getBuffer();
      53           0 :     const UChar *limit=s+dest.length();
      54           0 :     while(s<limit) {
      55           0 :         if(*s++>0x7f) {
      56           0 :             return FALSE;
      57             :         }
      58             :     }
      59           0 :     return TRUE;
      60             : }
      61             : 
      62             : static UBool
      63             : isASCIIOkBiDi(const UChar *s, int32_t length);
      64             : 
      65             : static UBool
      66             : isASCIIOkBiDi(const char *s, int32_t length);
      67             : 
      68             : // IDNA class default implementations -------------------------------------- ***
      69             : 
      70           0 : IDNA::~IDNA() {}
      71             : 
      72             : void
      73           0 : IDNA::labelToASCII_UTF8(StringPiece label, ByteSink &dest,
      74             :                         IDNAInfo &info, UErrorCode &errorCode) const {
      75           0 :     if(U_SUCCESS(errorCode)) {
      76           0 :         UnicodeString destString;
      77           0 :         labelToASCII(UnicodeString::fromUTF8(label), destString,
      78           0 :                      info, errorCode).toUTF8(dest);
      79             :     }
      80           0 : }
      81             : 
      82             : void
      83           0 : IDNA::labelToUnicodeUTF8(StringPiece label, ByteSink &dest,
      84             :                          IDNAInfo &info, UErrorCode &errorCode) const {
      85           0 :     if(U_SUCCESS(errorCode)) {
      86           0 :         UnicodeString destString;
      87           0 :         labelToUnicode(UnicodeString::fromUTF8(label), destString,
      88           0 :                        info, errorCode).toUTF8(dest);
      89             :     }
      90           0 : }
      91             : 
      92             : void
      93           0 : IDNA::nameToASCII_UTF8(StringPiece name, ByteSink &dest,
      94             :                        IDNAInfo &info, UErrorCode &errorCode) const {
      95           0 :     if(U_SUCCESS(errorCode)) {
      96           0 :         UnicodeString destString;
      97           0 :         nameToASCII(UnicodeString::fromUTF8(name), destString,
      98           0 :                     info, errorCode).toUTF8(dest);
      99             :     }
     100           0 : }
     101             : 
     102             : void
     103           0 : IDNA::nameToUnicodeUTF8(StringPiece name, ByteSink &dest,
     104             :                         IDNAInfo &info, UErrorCode &errorCode) const {
     105           0 :     if(U_SUCCESS(errorCode)) {
     106           0 :         UnicodeString destString;
     107           0 :         nameToUnicode(UnicodeString::fromUTF8(name), destString,
     108           0 :                       info, errorCode).toUTF8(dest);
     109             :     }
     110           0 : }
     111             : 
     112             : // UTS46 class declaration ------------------------------------------------- ***
     113             : 
     114             : class UTS46 : public IDNA {
     115             : public:
     116             :     UTS46(uint32_t options, UErrorCode &errorCode);
     117             :     virtual ~UTS46();
     118             : 
     119             :     virtual UnicodeString &
     120             :     labelToASCII(const UnicodeString &label, UnicodeString &dest,
     121             :                  IDNAInfo &info, UErrorCode &errorCode) const;
     122             : 
     123             :     virtual UnicodeString &
     124             :     labelToUnicode(const UnicodeString &label, UnicodeString &dest,
     125             :                    IDNAInfo &info, UErrorCode &errorCode) const;
     126             : 
     127             :     virtual UnicodeString &
     128             :     nameToASCII(const UnicodeString &name, UnicodeString &dest,
     129             :                 IDNAInfo &info, UErrorCode &errorCode) const;
     130             : 
     131             :     virtual UnicodeString &
     132             :     nameToUnicode(const UnicodeString &name, UnicodeString &dest,
     133             :                   IDNAInfo &info, UErrorCode &errorCode) const;
     134             : 
     135             :     virtual void
     136             :     labelToASCII_UTF8(StringPiece label, ByteSink &dest,
     137             :                       IDNAInfo &info, UErrorCode &errorCode) const;
     138             : 
     139             :     virtual void
     140             :     labelToUnicodeUTF8(StringPiece label, ByteSink &dest,
     141             :                        IDNAInfo &info, UErrorCode &errorCode) const;
     142             : 
     143             :     virtual void
     144             :     nameToASCII_UTF8(StringPiece name, ByteSink &dest,
     145             :                      IDNAInfo &info, UErrorCode &errorCode) const;
     146             : 
     147             :     virtual void
     148             :     nameToUnicodeUTF8(StringPiece name, ByteSink &dest,
     149             :                       IDNAInfo &info, UErrorCode &errorCode) const;
     150             : 
     151             : private:
     152             :     UnicodeString &
     153             :     process(const UnicodeString &src,
     154             :             UBool isLabel, UBool toASCII,
     155             :             UnicodeString &dest,
     156             :             IDNAInfo &info, UErrorCode &errorCode) const;
     157             : 
     158             :     void
     159             :     processUTF8(StringPiece src,
     160             :                 UBool isLabel, UBool toASCII,
     161             :                 ByteSink &dest,
     162             :                 IDNAInfo &info, UErrorCode &errorCode) const;
     163             : 
     164             :     UnicodeString &
     165             :     processUnicode(const UnicodeString &src,
     166             :                    int32_t labelStart, int32_t mappingStart,
     167             :                    UBool isLabel, UBool toASCII,
     168             :                    UnicodeString &dest,
     169             :                    IDNAInfo &info, UErrorCode &errorCode) const;
     170             : 
     171             :     // returns the new dest.length()
     172             :     int32_t
     173             :     mapDevChars(UnicodeString &dest, int32_t labelStart, int32_t mappingStart,
     174             :                 UErrorCode &errorCode) const;
     175             : 
     176             :     // returns the new label length
     177             :     int32_t
     178             :     processLabel(UnicodeString &dest,
     179             :                  int32_t labelStart, int32_t labelLength,
     180             :                  UBool toASCII,
     181             :                  IDNAInfo &info, UErrorCode &errorCode) const;
     182             :     int32_t
     183             :     markBadACELabel(UnicodeString &dest,
     184             :                     int32_t labelStart, int32_t labelLength,
     185             :                     UBool toASCII, IDNAInfo &info, UErrorCode &errorCode) const;
     186             : 
     187             :     void
     188             :     checkLabelBiDi(const UChar *label, int32_t labelLength, IDNAInfo &info) const;
     189             : 
     190             :     UBool
     191             :     isLabelOkContextJ(const UChar *label, int32_t labelLength) const;
     192             : 
     193             :     void
     194             :     checkLabelContextO(const UChar *label, int32_t labelLength, IDNAInfo &info) const;
     195             : 
     196             :     const Normalizer2 &uts46Norm2;  // uts46.nrm
     197             :     uint32_t options;
     198             : };
     199             : 
     200             : IDNA *
     201           3 : IDNA::createUTS46Instance(uint32_t options, UErrorCode &errorCode) {
     202           3 :     if(U_SUCCESS(errorCode)) {
     203           3 :         IDNA *idna=new UTS46(options, errorCode);
     204           3 :         if(idna==NULL) {
     205           0 :             errorCode=U_MEMORY_ALLOCATION_ERROR;
     206           3 :         } else if(U_FAILURE(errorCode)) {
     207           0 :             delete idna;
     208           0 :             idna=NULL;
     209             :         }
     210           3 :         return idna;
     211             :     } else {
     212           0 :         return NULL;
     213             :     }
     214             : }
     215             : 
     216             : // UTS46 implementation ---------------------------------------------------- ***
     217             : 
     218           3 : UTS46::UTS46(uint32_t opt, UErrorCode &errorCode)
     219           3 :         : uts46Norm2(*Normalizer2::getInstance(NULL, "uts46", UNORM2_COMPOSE, errorCode)),
     220           6 :           options(opt) {}
     221             : 
     222           0 : UTS46::~UTS46() {}
     223             : 
     224             : UnicodeString &
     225           0 : UTS46::labelToASCII(const UnicodeString &label, UnicodeString &dest,
     226             :                     IDNAInfo &info, UErrorCode &errorCode) const {
     227           0 :     return process(label, TRUE, TRUE, dest, info, errorCode);
     228             : }
     229             : 
     230             : UnicodeString &
     231           0 : UTS46::labelToUnicode(const UnicodeString &label, UnicodeString &dest,
     232             :                       IDNAInfo &info, UErrorCode &errorCode) const {
     233           0 :     return process(label, TRUE, FALSE, dest, info, errorCode);
     234             : }
     235             : 
     236             : UnicodeString &
     237           0 : UTS46::nameToASCII(const UnicodeString &name, UnicodeString &dest,
     238             :                    IDNAInfo &info, UErrorCode &errorCode) const {
     239           0 :     process(name, FALSE, TRUE, dest, info, errorCode);
     240           0 :     if( dest.length()>=254 && (info.errors&UIDNA_ERROR_DOMAIN_NAME_TOO_LONG)==0 &&
     241           0 :         isASCIIString(dest) &&
     242           0 :         (dest.length()>254 || dest[253]!=0x2e)
     243             :     ) {
     244           0 :         info.errors|=UIDNA_ERROR_DOMAIN_NAME_TOO_LONG;
     245             :     }
     246           0 :     return dest;
     247             : }
     248             : 
     249             : UnicodeString &
     250           0 : UTS46::nameToUnicode(const UnicodeString &name, UnicodeString &dest,
     251             :                      IDNAInfo &info, UErrorCode &errorCode) const {
     252           0 :     return process(name, FALSE, FALSE, dest, info, errorCode);
     253             : }
     254             : 
     255             : void
     256           0 : UTS46::labelToASCII_UTF8(StringPiece label, ByteSink &dest,
     257             :                          IDNAInfo &info, UErrorCode &errorCode) const {
     258           0 :     processUTF8(label, TRUE, TRUE, dest, info, errorCode);
     259           0 : }
     260             : 
     261             : void
     262           0 : UTS46::labelToUnicodeUTF8(StringPiece label, ByteSink &dest,
     263             :                           IDNAInfo &info, UErrorCode &errorCode) const {
     264           0 :     processUTF8(label, TRUE, FALSE, dest, info, errorCode);
     265           0 : }
     266             : 
     267             : void
     268           0 : UTS46::nameToASCII_UTF8(StringPiece name, ByteSink &dest,
     269             :                         IDNAInfo &info, UErrorCode &errorCode) const {
     270           0 :     processUTF8(name, FALSE, TRUE, dest, info, errorCode);
     271           0 : }
     272             : 
     273             : void
     274           0 : UTS46::nameToUnicodeUTF8(StringPiece name, ByteSink &dest,
     275             :                          IDNAInfo &info, UErrorCode &errorCode) const {
     276           0 :     processUTF8(name, FALSE, FALSE, dest, info, errorCode);
     277           0 : }
     278             : 
     279             : // UTS #46 data for ASCII characters.
     280             : // The normalizer (using uts46.nrm) maps uppercase ASCII letters to lowercase
     281             : // and passes through all other ASCII characters.
     282             : // If UIDNA_USE_STD3_RULES is set, then non-LDH characters are disallowed
     283             : // using this data.
     284             : // The ASCII fastpath also uses this data.
     285             : // Values: -1=disallowed  0==valid  1==mapped (lowercase)
     286             : static const int8_t asciiData[128]={
     287             :     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
     288             :     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
     289             :     // 002D..002E; valid  #  HYPHEN-MINUS..FULL STOP
     290             :     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  0,  0, -1,
     291             :     // 0030..0039; valid  #  DIGIT ZERO..DIGIT NINE
     292             :      0,  0,  0,  0,  0,  0,  0,  0,  0,  0, -1, -1, -1, -1, -1, -1,
     293             :     // 0041..005A; mapped  #  LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z
     294             :     -1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
     295             :      1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, -1, -1, -1, -1, -1,
     296             :     // 0061..007A; valid  #  LATIN SMALL LETTER A..LATIN SMALL LETTER Z
     297             :     -1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
     298             :      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, -1, -1, -1, -1, -1
     299             : };
     300             : 
     301             : UnicodeString &
     302           0 : UTS46::process(const UnicodeString &src,
     303             :                UBool isLabel, UBool toASCII,
     304             :                UnicodeString &dest,
     305             :                IDNAInfo &info, UErrorCode &errorCode) const {
     306             :     // uts46Norm2.normalize() would do all of this error checking and setup,
     307             :     // but with the ASCII fastpath we do not always call it, and do not
     308             :     // call it first.
     309           0 :     if(U_FAILURE(errorCode)) {
     310           0 :         dest.setToBogus();
     311           0 :         return dest;
     312             :     }
     313           0 :     const UChar *srcArray=src.getBuffer();
     314           0 :     if(&dest==&src || srcArray==NULL) {
     315           0 :         errorCode=U_ILLEGAL_ARGUMENT_ERROR;
     316           0 :         dest.setToBogus();
     317           0 :         return dest;
     318             :     }
     319             :     // Arguments are fine, reset output values.
     320           0 :     dest.remove();
     321           0 :     info.reset();
     322           0 :     int32_t srcLength=src.length();
     323           0 :     if(srcLength==0) {
     324           0 :         info.errors|=UIDNA_ERROR_EMPTY_LABEL;
     325           0 :         return dest;
     326             :     }
     327           0 :     UChar *destArray=dest.getBuffer(srcLength);
     328           0 :     if(destArray==NULL) {
     329           0 :         errorCode=U_MEMORY_ALLOCATION_ERROR;
     330           0 :         return dest;
     331             :     }
     332             :     // ASCII fastpath
     333           0 :     UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0;
     334           0 :     int32_t labelStart=0;
     335             :     int32_t i;
     336           0 :     for(i=0;; ++i) {
     337           0 :         if(i==srcLength) {
     338           0 :             if(toASCII) {
     339           0 :                 if((i-labelStart)>63) {
     340           0 :                     info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG;
     341             :                 }
     342             :                 // There is a trailing dot if labelStart==i.
     343           0 :                 if(!isLabel && i>=254 && (i>254 || labelStart<i)) {
     344           0 :                     info.errors|=UIDNA_ERROR_DOMAIN_NAME_TOO_LONG;
     345             :                 }
     346             :             }
     347           0 :             info.errors|=info.labelErrors;
     348           0 :             dest.releaseBuffer(i);
     349           0 :             return dest;
     350             :         }
     351           0 :         UChar c=srcArray[i];
     352           0 :         if(c>0x7f) {
     353           0 :             break;
     354             :         }
     355           0 :         int cData=asciiData[c];
     356           0 :         if(cData>0) {
     357           0 :             destArray[i]=c+0x20;  // Lowercase an uppercase ASCII letter.
     358           0 :         } else if(cData<0 && disallowNonLDHDot) {
     359             :             break;  // Replacing with U+FFFD can be complicated for toASCII.
     360             :         } else {
     361           0 :             destArray[i]=c;
     362           0 :             if(c==0x2d) {  // hyphen
     363           0 :                 if(i==(labelStart+3) && srcArray[i-1]==0x2d) {
     364             :                     // "??--..." is Punycode or forbidden.
     365           0 :                     ++i;  // '-' was copied to dest already
     366           0 :                     break;
     367             :                 }
     368           0 :                 if(i==labelStart) {
     369             :                     // label starts with "-"
     370           0 :                     info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN;
     371             :                 }
     372           0 :                 if((i+1)==srcLength || srcArray[i+1]==0x2e) {
     373             :                     // label ends with "-"
     374           0 :                     info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN;
     375             :                 }
     376           0 :             } else if(c==0x2e) {  // dot
     377           0 :                 if(isLabel) {
     378             :                     // Replacing with U+FFFD can be complicated for toASCII.
     379           0 :                     ++i;  // '.' was copied to dest already
     380           0 :                     break;
     381             :                 }
     382           0 :                 if(i==labelStart) {
     383           0 :                     info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL;
     384             :                 }
     385           0 :                 if(toASCII && (i-labelStart)>63) {
     386           0 :                     info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG;
     387             :                 }
     388           0 :                 info.errors|=info.labelErrors;
     389           0 :                 info.labelErrors=0;
     390           0 :                 labelStart=i+1;
     391             :             }
     392             :         }
     393           0 :     }
     394           0 :     info.errors|=info.labelErrors;
     395           0 :     dest.releaseBuffer(i);
     396           0 :     processUnicode(src, labelStart, i, isLabel, toASCII, dest, info, errorCode);
     397           0 :     if( info.isBiDi && U_SUCCESS(errorCode) && (info.errors&severeErrors)==0 &&
     398           0 :         (!info.isOkBiDi || (labelStart>0 && !isASCIIOkBiDi(dest.getBuffer(), labelStart)))
     399             :     ) {
     400           0 :         info.errors|=UIDNA_ERROR_BIDI;
     401             :     }
     402           0 :     return dest;
     403             : }
     404             : 
     405             : void
     406           0 : UTS46::processUTF8(StringPiece src,
     407             :                    UBool isLabel, UBool toASCII,
     408             :                    ByteSink &dest,
     409             :                    IDNAInfo &info, UErrorCode &errorCode) const {
     410           0 :     if(U_FAILURE(errorCode)) {
     411           0 :         return;
     412             :     }
     413           0 :     const char *srcArray=src.data();
     414           0 :     int32_t srcLength=src.length();
     415           0 :     if(srcArray==NULL && srcLength!=0) {
     416           0 :         errorCode=U_ILLEGAL_ARGUMENT_ERROR;
     417           0 :         return;
     418             :     }
     419             :     // Arguments are fine, reset output values.
     420           0 :     info.reset();
     421           0 :     if(srcLength==0) {
     422           0 :         info.errors|=UIDNA_ERROR_EMPTY_LABEL;
     423           0 :         dest.Flush();
     424           0 :         return;
     425             :     }
     426           0 :     UnicodeString destString;
     427           0 :     int32_t labelStart=0;
     428           0 :     if(srcLength<=256) {  // length of stackArray[]
     429             :         // ASCII fastpath
     430             :         char stackArray[256];
     431             :         int32_t destCapacity;
     432           0 :         char *destArray=dest.GetAppendBuffer(srcLength, srcLength+20,
     433           0 :                                              stackArray, UPRV_LENGTHOF(stackArray), &destCapacity);
     434           0 :         UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0;
     435             :         int32_t i;
     436           0 :         for(i=0;; ++i) {
     437           0 :             if(i==srcLength) {
     438           0 :                 if(toASCII) {
     439           0 :                     if((i-labelStart)>63) {
     440           0 :                         info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG;
     441             :                     }
     442             :                     // There is a trailing dot if labelStart==i.
     443           0 :                     if(!isLabel && i>=254 && (i>254 || labelStart<i)) {
     444           0 :                         info.errors|=UIDNA_ERROR_DOMAIN_NAME_TOO_LONG;
     445             :                     }
     446             :                 }
     447           0 :                 info.errors|=info.labelErrors;
     448           0 :                 dest.Append(destArray, i);
     449           0 :                 dest.Flush();
     450           0 :                 return;
     451             :             }
     452           0 :             char c=srcArray[i];
     453           0 :             if((int8_t)c<0) {  // (uint8_t)c>0x7f
     454           0 :                 break;
     455             :             }
     456           0 :             int cData=asciiData[(int)c];  // Cast: gcc warns about indexing with a char.
     457           0 :             if(cData>0) {
     458           0 :                 destArray[i]=c+0x20;  // Lowercase an uppercase ASCII letter.
     459           0 :             } else if(cData<0 && disallowNonLDHDot) {
     460             :                 break;  // Replacing with U+FFFD can be complicated for toASCII.
     461             :             } else {
     462           0 :                 destArray[i]=c;
     463           0 :                 if(c==0x2d) {  // hyphen
     464           0 :                     if(i==(labelStart+3) && srcArray[i-1]==0x2d) {
     465             :                         // "??--..." is Punycode or forbidden.
     466           0 :                         break;
     467             :                     }
     468           0 :                     if(i==labelStart) {
     469             :                         // label starts with "-"
     470           0 :                         info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN;
     471             :                     }
     472           0 :                     if((i+1)==srcLength || srcArray[i+1]==0x2e) {
     473             :                         // label ends with "-"
     474           0 :                         info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN;
     475             :                     }
     476           0 :                 } else if(c==0x2e) {  // dot
     477           0 :                     if(isLabel) {
     478           0 :                         break;  // Replacing with U+FFFD can be complicated for toASCII.
     479             :                     }
     480           0 :                     if(i==labelStart) {
     481           0 :                         info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL;
     482             :                     }
     483           0 :                     if(toASCII && (i-labelStart)>63) {
     484           0 :                         info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG;
     485             :                     }
     486           0 :                     info.errors|=info.labelErrors;
     487           0 :                     info.labelErrors=0;
     488           0 :                     labelStart=i+1;
     489             :                 }
     490             :             }
     491           0 :         }
     492           0 :         info.errors|=info.labelErrors;
     493             :         // Convert the processed ASCII prefix of the current label to UTF-16.
     494           0 :         int32_t mappingStart=i-labelStart;
     495           0 :         destString=UnicodeString::fromUTF8(StringPiece(destArray+labelStart, mappingStart));
     496             :         // Output the previous ASCII labels and process the rest of src in UTF-16.
     497           0 :         dest.Append(destArray, labelStart);
     498           0 :         processUnicode(UnicodeString::fromUTF8(StringPiece(src, labelStart)), 0, mappingStart,
     499             :                        isLabel, toASCII,
     500           0 :                        destString, info, errorCode);
     501             :     } else {
     502             :         // src is too long for the ASCII fastpath implementation.
     503           0 :         processUnicode(UnicodeString::fromUTF8(src), 0, 0,
     504             :                        isLabel, toASCII,
     505           0 :                        destString, info, errorCode);
     506             :     }
     507           0 :     destString.toUTF8(dest);  // calls dest.Flush()
     508           0 :     if(toASCII && !isLabel) {
     509             :         // length==labelStart==254 means that there is a trailing dot (ok) and
     510             :         // destString is empty (do not index at 253-labelStart).
     511           0 :         int32_t length=labelStart+destString.length();
     512           0 :         if( length>=254 && isASCIIString(destString) &&
     513           0 :             (length>254 ||
     514           0 :              (labelStart<254 && destString[253-labelStart]!=0x2e))
     515             :         ) {
     516           0 :             info.errors|=UIDNA_ERROR_DOMAIN_NAME_TOO_LONG;
     517             :         }
     518             :     }
     519           0 :     if( info.isBiDi && U_SUCCESS(errorCode) && (info.errors&severeErrors)==0 &&
     520           0 :         (!info.isOkBiDi || (labelStart>0 && !isASCIIOkBiDi(srcArray, labelStart)))
     521             :     ) {
     522           0 :         info.errors|=UIDNA_ERROR_BIDI;
     523             :     }
     524             : }
     525             : 
     526             : UnicodeString &
     527           0 : UTS46::processUnicode(const UnicodeString &src,
     528             :                       int32_t labelStart, int32_t mappingStart,
     529             :                       UBool isLabel, UBool toASCII,
     530             :                       UnicodeString &dest,
     531             :                       IDNAInfo &info, UErrorCode &errorCode) const {
     532           0 :     if(mappingStart==0) {
     533           0 :         uts46Norm2.normalize(src, dest, errorCode);
     534             :     } else {
     535           0 :         uts46Norm2.normalizeSecondAndAppend(dest, src.tempSubString(mappingStart), errorCode);
     536             :     }
     537           0 :     if(U_FAILURE(errorCode)) {
     538           0 :         return dest;
     539             :     }
     540             :     UBool doMapDevChars=
     541           0 :         toASCII ? (options&UIDNA_NONTRANSITIONAL_TO_ASCII)==0 :
     542           0 :                   (options&UIDNA_NONTRANSITIONAL_TO_UNICODE)==0;
     543           0 :     const UChar *destArray=dest.getBuffer();
     544           0 :     int32_t destLength=dest.length();
     545           0 :     int32_t labelLimit=labelStart;
     546           0 :     while(labelLimit<destLength) {
     547           0 :         UChar c=destArray[labelLimit];
     548           0 :         if(c==0x2e && !isLabel) {
     549           0 :             int32_t labelLength=labelLimit-labelStart;
     550           0 :             int32_t newLength=processLabel(dest, labelStart, labelLength,
     551           0 :                                             toASCII, info, errorCode);
     552           0 :             info.errors|=info.labelErrors;
     553           0 :             info.labelErrors=0;
     554           0 :             if(U_FAILURE(errorCode)) {
     555           0 :                 return dest;
     556             :             }
     557           0 :             destArray=dest.getBuffer();
     558           0 :             destLength+=newLength-labelLength;
     559           0 :             labelLimit=labelStart+=newLength+1;
     560           0 :         } else if(0xdf<=c && c<=0x200d && (c==0xdf || c==0x3c2 || c>=0x200c)) {
     561           0 :             info.isTransDiff=TRUE;
     562           0 :             if(doMapDevChars) {
     563           0 :                 destLength=mapDevChars(dest, labelStart, labelLimit, errorCode);
     564           0 :                 if(U_FAILURE(errorCode)) {
     565           0 :                     return dest;
     566             :                 }
     567           0 :                 destArray=dest.getBuffer();
     568             :                 // Do not increment labelLimit in case c was removed.
     569             :                 // All deviation characters have been mapped, no need to check for them again.
     570           0 :                 doMapDevChars=FALSE;
     571             :             } else {
     572           0 :                 ++labelLimit;
     573             :             }
     574             :         } else {
     575           0 :             ++labelLimit;
     576             :         }
     577             :     }
     578             :     // Permit an empty label at the end (0<labelStart==labelLimit==destLength is ok)
     579             :     // but not an empty label elsewhere nor a completely empty domain name.
     580             :     // processLabel() sets UIDNA_ERROR_EMPTY_LABEL when labelLength==0.
     581           0 :     if(0==labelStart || labelStart<labelLimit) {
     582           0 :         processLabel(dest, labelStart, labelLimit-labelStart,
     583           0 :                       toASCII, info, errorCode);
     584           0 :         info.errors|=info.labelErrors;
     585             :     }
     586           0 :     return dest;
     587             : }
     588             : 
     589             : int32_t
     590           0 : UTS46::mapDevChars(UnicodeString &dest, int32_t labelStart, int32_t mappingStart,
     591             :                    UErrorCode &errorCode) const {
     592           0 :     if(U_FAILURE(errorCode)) {
     593           0 :         return 0;
     594             :     }
     595           0 :     int32_t length=dest.length();
     596           0 :     UChar *s=dest.getBuffer(dest[mappingStart]==0xdf ? length+1 : length);
     597           0 :     if(s==NULL) {
     598           0 :         errorCode=U_MEMORY_ALLOCATION_ERROR;
     599           0 :         return length;
     600             :     }
     601           0 :     int32_t capacity=dest.getCapacity();
     602           0 :     UBool didMapDevChars=FALSE;
     603           0 :     int32_t readIndex=mappingStart, writeIndex=mappingStart;
     604           0 :     do {
     605           0 :         UChar c=s[readIndex++];
     606           0 :         switch(c) {
     607             :         case 0xdf:
     608             :             // Map sharp s to ss.
     609           0 :             didMapDevChars=TRUE;
     610           0 :             s[writeIndex++]=0x73;  // Replace sharp s with first s.
     611             :             // Insert second s and account for possible buffer reallocation.
     612           0 :             if(writeIndex==readIndex) {
     613           0 :                 if(length==capacity) {
     614           0 :                     dest.releaseBuffer(length);
     615           0 :                     s=dest.getBuffer(length+1);
     616           0 :                     if(s==NULL) {
     617           0 :                         errorCode=U_MEMORY_ALLOCATION_ERROR;
     618           0 :                         return length;
     619             :                     }
     620           0 :                     capacity=dest.getCapacity();
     621             :                 }
     622           0 :                 u_memmove(s+writeIndex+1, s+writeIndex, length-writeIndex);
     623           0 :                 ++readIndex;
     624             :             }
     625           0 :             s[writeIndex++]=0x73;
     626           0 :             ++length;
     627           0 :             break;
     628             :         case 0x3c2:  // Map final sigma to nonfinal sigma.
     629           0 :             didMapDevChars=TRUE;
     630           0 :             s[writeIndex++]=0x3c3;
     631           0 :             break;
     632             :         case 0x200c:  // Ignore/remove ZWNJ.
     633             :         case 0x200d:  // Ignore/remove ZWJ.
     634           0 :             didMapDevChars=TRUE;
     635           0 :             --length;
     636           0 :             break;
     637             :         default:
     638             :             // Only really necessary if writeIndex was different from readIndex.
     639           0 :             s[writeIndex++]=c;
     640           0 :             break;
     641             :         }
     642           0 :     } while(writeIndex<length);
     643           0 :     dest.releaseBuffer(length);
     644           0 :     if(didMapDevChars) {
     645             :         // Mapping deviation characters might have resulted in an un-NFC string.
     646             :         // We could use either the NFC or the UTS #46 normalizer.
     647             :         // By using the UTS #46 normalizer again, we avoid having to load a second .nrm data file.
     648           0 :         UnicodeString normalized;
     649           0 :         uts46Norm2.normalize(dest.tempSubString(labelStart), normalized, errorCode);
     650           0 :         if(U_SUCCESS(errorCode)) {
     651           0 :             dest.replace(labelStart, 0x7fffffff, normalized);
     652           0 :             if(dest.isBogus()) {
     653           0 :                 errorCode=U_MEMORY_ALLOCATION_ERROR;
     654             :             }
     655           0 :             return dest.length();
     656             :         }
     657             :     }
     658           0 :     return length;
     659             : }
     660             : 
     661             : // Some non-ASCII characters are equivalent to sequences with
     662             : // non-LDH ASCII characters. To find them:
     663             : // grep disallowed_STD3_valid IdnaMappingTable.txt (or uts46.txt)
     664             : static inline UBool
     665           0 : isNonASCIIDisallowedSTD3Valid(UChar32 c) {
     666           0 :     return c==0x2260 || c==0x226E || c==0x226F;
     667             : }
     668             : 
     669             : // Replace the label in dest with the label string, if the label was modified.
     670             : // If &label==&dest then the label was modified in-place and labelLength
     671             : // is the new label length, different from label.length().
     672             : // If &label!=&dest then labelLength==label.length().
     673             : // Returns labelLength (= the new label length).
     674             : static int32_t
     675           0 : replaceLabel(UnicodeString &dest, int32_t destLabelStart, int32_t destLabelLength,
     676             :              const UnicodeString &label, int32_t labelLength, UErrorCode &errorCode) {
     677           0 :     if(U_FAILURE(errorCode)) {
     678           0 :         return 0;
     679             :     }
     680           0 :     if(&label!=&dest) {
     681           0 :         dest.replace(destLabelStart, destLabelLength, label);
     682           0 :         if(dest.isBogus()) {
     683           0 :             errorCode=U_MEMORY_ALLOCATION_ERROR;
     684           0 :             return 0;
     685             :         }
     686             :     }
     687           0 :     return labelLength;
     688             : }
     689             : 
     690             : int32_t
     691           0 : UTS46::processLabel(UnicodeString &dest,
     692             :                     int32_t labelStart, int32_t labelLength,
     693             :                     UBool toASCII,
     694             :                     IDNAInfo &info, UErrorCode &errorCode) const {
     695           0 :     if(U_FAILURE(errorCode)) {
     696           0 :         return 0;
     697             :     }
     698           0 :     UnicodeString fromPunycode;
     699             :     UnicodeString *labelString;
     700           0 :     const UChar *label=dest.getBuffer()+labelStart;
     701           0 :     int32_t destLabelStart=labelStart;
     702           0 :     int32_t destLabelLength=labelLength;
     703             :     UBool wasPunycode;
     704           0 :     if(labelLength>=4 && label[0]==0x78 && label[1]==0x6e && label[2]==0x2d && label[3]==0x2d) {
     705             :         // Label starts with "xn--", try to un-Punycode it.
     706           0 :         wasPunycode=TRUE;
     707           0 :         UChar *unicodeBuffer=fromPunycode.getBuffer(-1);  // capacity==-1: most labels should fit
     708           0 :         if(unicodeBuffer==NULL) {
     709             :             // Should never occur if we used capacity==-1 which uses the internal buffer.
     710           0 :             errorCode=U_MEMORY_ALLOCATION_ERROR;
     711           0 :             return labelLength;
     712             :         }
     713           0 :         UErrorCode punycodeErrorCode=U_ZERO_ERROR;
     714           0 :         int32_t unicodeLength=u_strFromPunycode(label+4, labelLength-4,
     715             :                                                 unicodeBuffer, fromPunycode.getCapacity(),
     716           0 :                                                 NULL, &punycodeErrorCode);
     717           0 :         if(punycodeErrorCode==U_BUFFER_OVERFLOW_ERROR) {
     718           0 :             fromPunycode.releaseBuffer(0);
     719           0 :             unicodeBuffer=fromPunycode.getBuffer(unicodeLength);
     720           0 :             if(unicodeBuffer==NULL) {
     721           0 :                 errorCode=U_MEMORY_ALLOCATION_ERROR;
     722           0 :                 return labelLength;
     723             :             }
     724           0 :             punycodeErrorCode=U_ZERO_ERROR;
     725           0 :             unicodeLength=u_strFromPunycode(label+4, labelLength-4,
     726             :                                             unicodeBuffer, fromPunycode.getCapacity(),
     727           0 :                                             NULL, &punycodeErrorCode);
     728             :         }
     729           0 :         fromPunycode.releaseBuffer(unicodeLength);
     730           0 :         if(U_FAILURE(punycodeErrorCode)) {
     731           0 :             info.labelErrors|=UIDNA_ERROR_PUNYCODE;
     732           0 :             return markBadACELabel(dest, labelStart, labelLength, toASCII, info, errorCode);
     733             :         }
     734             :         // Check for NFC, and for characters that are not
     735             :         // valid or deviation characters according to the normalizer.
     736             :         // If there is something wrong, then the string will change.
     737             :         // Note that the normalizer passes through non-LDH ASCII and deviation characters.
     738             :         // Deviation characters are ok in Punycode even in transitional processing.
     739             :         // In the code further below, if we find non-LDH ASCII and we have UIDNA_USE_STD3_RULES
     740             :         // then we will set UIDNA_ERROR_INVALID_ACE_LABEL there too.
     741           0 :         UBool isValid=uts46Norm2.isNormalized(fromPunycode, errorCode);
     742           0 :         if(U_FAILURE(errorCode)) {
     743           0 :             return labelLength;
     744             :         }
     745           0 :         if(!isValid) {
     746           0 :             info.labelErrors|=UIDNA_ERROR_INVALID_ACE_LABEL;
     747           0 :             return markBadACELabel(dest, labelStart, labelLength, toASCII, info, errorCode);
     748             :         }
     749           0 :         labelString=&fromPunycode;
     750           0 :         label=fromPunycode.getBuffer();
     751           0 :         labelStart=0;
     752           0 :         labelLength=fromPunycode.length();
     753             :     } else {
     754           0 :         wasPunycode=FALSE;
     755           0 :         labelString=&dest;
     756             :     }
     757             :     // Validity check
     758           0 :     if(labelLength==0) {
     759           0 :         info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL;
     760             :         return replaceLabel(dest, destLabelStart, destLabelLength,
     761           0 :                             *labelString, labelLength, errorCode);
     762             :     }
     763             :     // labelLength>0
     764           0 :     if(labelLength>=4 && label[2]==0x2d && label[3]==0x2d) {
     765             :         // label starts with "??--"
     766           0 :         info.labelErrors|=UIDNA_ERROR_HYPHEN_3_4;
     767             :     }
     768           0 :     if(label[0]==0x2d) {
     769             :         // label starts with "-"
     770           0 :         info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN;
     771             :     }
     772           0 :     if(label[labelLength-1]==0x2d) {
     773             :         // label ends with "-"
     774           0 :         info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN;
     775             :     }
     776             :     // If the label was not a Punycode label, then it was the result of
     777             :     // mapping, normalization and label segmentation.
     778             :     // If the label was in Punycode, then we mapped it again above
     779             :     // and checked its validity.
     780             :     // Now we handle the STD3 restriction to LDH characters (if set)
     781             :     // and we look for U+FFFD which indicates disallowed characters
     782             :     // in a non-Punycode label or U+FFFD itself in a Punycode label.
     783             :     // We also check for dots which can come from the input to a single-label function.
     784             :     // Ok to cast away const because we own the UnicodeString.
     785           0 :     UChar *s=(UChar *)label;
     786           0 :     const UChar *limit=label+labelLength;
     787           0 :     UChar oredChars=0;
     788             :     // If we enforce STD3 rules, then ASCII characters other than LDH and dot are disallowed.
     789           0 :     UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0;
     790           0 :     do {
     791           0 :         UChar c=*s;
     792           0 :         if(c<=0x7f) {
     793           0 :             if(c==0x2e) {
     794           0 :                 info.labelErrors|=UIDNA_ERROR_LABEL_HAS_DOT;
     795           0 :                 *s=0xfffd;
     796           0 :             } else if(disallowNonLDHDot && asciiData[c]<0) {
     797           0 :                 info.labelErrors|=UIDNA_ERROR_DISALLOWED;
     798           0 :                 *s=0xfffd;
     799             :             }
     800             :         } else {
     801           0 :             oredChars|=c;
     802           0 :             if(disallowNonLDHDot && isNonASCIIDisallowedSTD3Valid(c)) {
     803           0 :                 info.labelErrors|=UIDNA_ERROR_DISALLOWED;
     804           0 :                 *s=0xfffd;
     805           0 :             } else if(c==0xfffd) {
     806           0 :                 info.labelErrors|=UIDNA_ERROR_DISALLOWED;
     807             :             }
     808             :         }
     809           0 :         ++s;
     810           0 :     } while(s<limit);
     811             :     // Check for a leading combining mark after other validity checks
     812             :     // so that we don't report UIDNA_ERROR_DISALLOWED for the U+FFFD from here.
     813             :     UChar32 c;
     814           0 :     int32_t cpLength=0;
     815             :     // "Unsafe" is ok because unpaired surrogates were mapped to U+FFFD.
     816           0 :     U16_NEXT_UNSAFE(label, cpLength, c);
     817           0 :     if((U_GET_GC_MASK(c)&U_GC_M_MASK)!=0) {
     818           0 :         info.labelErrors|=UIDNA_ERROR_LEADING_COMBINING_MARK;
     819           0 :         labelString->replace(labelStart, cpLength, (UChar)0xfffd);
     820           0 :         label=labelString->getBuffer()+labelStart;
     821           0 :         labelLength+=1-cpLength;
     822           0 :         if(labelString==&dest) {
     823           0 :             destLabelLength=labelLength;
     824             :         }
     825             :     }
     826           0 :     if((info.labelErrors&severeErrors)==0) {
     827             :         // Do contextual checks only if we do not have U+FFFD from a severe error
     828             :         // because U+FFFD can make these checks fail.
     829           0 :         if((options&UIDNA_CHECK_BIDI)!=0 && (!info.isBiDi || info.isOkBiDi)) {
     830           0 :             checkLabelBiDi(label, labelLength, info);
     831             :         }
     832           0 :         if( (options&UIDNA_CHECK_CONTEXTJ)!=0 && (oredChars&0x200c)==0x200c &&
     833           0 :             !isLabelOkContextJ(label, labelLength)
     834             :         ) {
     835           0 :             info.labelErrors|=UIDNA_ERROR_CONTEXTJ;
     836             :         }
     837           0 :         if((options&UIDNA_CHECK_CONTEXTO)!=0 && oredChars>=0xb7) {
     838           0 :             checkLabelContextO(label, labelLength, info);
     839             :         }
     840           0 :         if(toASCII) {
     841           0 :             if(wasPunycode) {
     842             :                 // Leave a Punycode label unchanged if it has no severe errors.
     843           0 :                 if(destLabelLength>63) {
     844           0 :                     info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG;
     845             :                 }
     846           0 :                 return destLabelLength;
     847           0 :             } else if(oredChars>=0x80) {
     848             :                 // Contains non-ASCII characters.
     849           0 :                 UnicodeString punycode;
     850           0 :                 UChar *buffer=punycode.getBuffer(63);  // 63==maximum DNS label length
     851           0 :                 if(buffer==NULL) {
     852           0 :                     errorCode=U_MEMORY_ALLOCATION_ERROR;
     853           0 :                     return destLabelLength;
     854             :                 }
     855           0 :                 buffer[0]=0x78;  // Write "xn--".
     856           0 :                 buffer[1]=0x6e;
     857           0 :                 buffer[2]=0x2d;
     858           0 :                 buffer[3]=0x2d;
     859           0 :                 int32_t punycodeLength=u_strToPunycode(label, labelLength,
     860           0 :                                                       buffer+4, punycode.getCapacity()-4,
     861           0 :                                                       NULL, &errorCode);
     862           0 :                 if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
     863           0 :                     errorCode=U_ZERO_ERROR;
     864           0 :                     punycode.releaseBuffer(4);
     865           0 :                     buffer=punycode.getBuffer(4+punycodeLength);
     866           0 :                     if(buffer==NULL) {
     867           0 :                         errorCode=U_MEMORY_ALLOCATION_ERROR;
     868           0 :                         return destLabelLength;
     869             :                     }
     870           0 :                     punycodeLength=u_strToPunycode(label, labelLength,
     871           0 :                                                   buffer+4, punycode.getCapacity()-4,
     872           0 :                                                   NULL, &errorCode);
     873             :                 }
     874           0 :                 punycodeLength+=4;
     875           0 :                 punycode.releaseBuffer(punycodeLength);
     876           0 :                 if(U_FAILURE(errorCode)) {
     877           0 :                     return destLabelLength;
     878             :                 }
     879           0 :                 if(punycodeLength>63) {
     880           0 :                     info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG;
     881             :                 }
     882             :                 return replaceLabel(dest, destLabelStart, destLabelLength,
     883           0 :                                     punycode, punycodeLength, errorCode);
     884             :             } else {
     885             :                 // all-ASCII label
     886           0 :                 if(labelLength>63) {
     887           0 :                     info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG;
     888             :                 }
     889             :             }
     890             :         }
     891             :     } else {
     892             :         // If a Punycode label has severe errors,
     893             :         // then leave it but make sure it does not look valid.
     894           0 :         if(wasPunycode) {
     895           0 :             info.labelErrors|=UIDNA_ERROR_INVALID_ACE_LABEL;
     896           0 :             return markBadACELabel(dest, destLabelStart, destLabelLength, toASCII, info, errorCode);
     897             :         }
     898             :     }
     899             :     return replaceLabel(dest, destLabelStart, destLabelLength,
     900           0 :                         *labelString, labelLength, errorCode);
     901             : }
     902             : 
     903             : // Make sure an ACE label does not look valid.
     904             : // Append U+FFFD if the label has only LDH characters.
     905             : // If UIDNA_USE_STD3_RULES, also replace disallowed ASCII characters with U+FFFD.
     906             : int32_t
     907           0 : UTS46::markBadACELabel(UnicodeString &dest,
     908             :                        int32_t labelStart, int32_t labelLength,
     909             :                        UBool toASCII, IDNAInfo &info, UErrorCode &errorCode) const {
     910           0 :     if(U_FAILURE(errorCode)) {
     911           0 :         return 0;
     912             :     }
     913           0 :     UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0;
     914           0 :     UBool isASCII=TRUE;
     915           0 :     UBool onlyLDH=TRUE;
     916           0 :     const UChar *label=dest.getBuffer()+labelStart;
     917             :     // Ok to cast away const because we own the UnicodeString.
     918           0 :     UChar *s=(UChar *)label+4;  // After the initial "xn--".
     919           0 :     const UChar *limit=label+labelLength;
     920           0 :     do {
     921           0 :         UChar c=*s;
     922           0 :         if(c<=0x7f) {
     923           0 :             if(c==0x2e) {
     924           0 :                 info.labelErrors|=UIDNA_ERROR_LABEL_HAS_DOT;
     925           0 :                 *s=0xfffd;
     926           0 :                 isASCII=onlyLDH=FALSE;
     927           0 :             } else if(asciiData[c]<0) {
     928           0 :                 onlyLDH=FALSE;
     929           0 :                 if(disallowNonLDHDot) {
     930           0 :                     *s=0xfffd;
     931           0 :                     isASCII=FALSE;
     932             :                 }
     933             :             }
     934             :         } else {
     935           0 :             isASCII=onlyLDH=FALSE;
     936             :         }
     937             :     } while(++s<limit);
     938           0 :     if(onlyLDH) {
     939           0 :         dest.insert(labelStart+labelLength, (UChar)0xfffd);
     940           0 :         if(dest.isBogus()) {
     941           0 :             errorCode=U_MEMORY_ALLOCATION_ERROR;
     942           0 :             return 0;
     943             :         }
     944           0 :         ++labelLength;
     945             :     } else {
     946           0 :         if(toASCII && isASCII && labelLength>63) {
     947           0 :             info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG;
     948             :         }
     949             :     }
     950           0 :     return labelLength;
     951             : }
     952             : 
     953             : const uint32_t L_MASK=U_MASK(U_LEFT_TO_RIGHT);
     954             : const uint32_t R_AL_MASK=U_MASK(U_RIGHT_TO_LEFT)|U_MASK(U_RIGHT_TO_LEFT_ARABIC);
     955             : const uint32_t L_R_AL_MASK=L_MASK|R_AL_MASK;
     956             : 
     957             : const uint32_t R_AL_AN_MASK=R_AL_MASK|U_MASK(U_ARABIC_NUMBER);
     958             : 
     959             : const uint32_t EN_AN_MASK=U_MASK(U_EUROPEAN_NUMBER)|U_MASK(U_ARABIC_NUMBER);
     960             : const uint32_t R_AL_EN_AN_MASK=R_AL_MASK|EN_AN_MASK;
     961             : const uint32_t L_EN_MASK=L_MASK|U_MASK(U_EUROPEAN_NUMBER);
     962             : 
     963             : const uint32_t ES_CS_ET_ON_BN_NSM_MASK=
     964             :     U_MASK(U_EUROPEAN_NUMBER_SEPARATOR)|
     965             :     U_MASK(U_COMMON_NUMBER_SEPARATOR)|
     966             :     U_MASK(U_EUROPEAN_NUMBER_TERMINATOR)|
     967             :     U_MASK(U_OTHER_NEUTRAL)|
     968             :     U_MASK(U_BOUNDARY_NEUTRAL)|
     969             :     U_MASK(U_DIR_NON_SPACING_MARK);
     970             : const uint32_t L_EN_ES_CS_ET_ON_BN_NSM_MASK=L_EN_MASK|ES_CS_ET_ON_BN_NSM_MASK;
     971             : const uint32_t R_AL_AN_EN_ES_CS_ET_ON_BN_NSM_MASK=R_AL_MASK|EN_AN_MASK|ES_CS_ET_ON_BN_NSM_MASK;
     972             : 
     973             : // We scan the whole label and check both for whether it contains RTL characters
     974             : // and whether it passes the BiDi Rule.
     975             : // In a BiDi domain name, all labels must pass the BiDi Rule, but we might find
     976             : // that a domain name is a BiDi domain name (has an RTL label) only after
     977             : // processing several earlier labels.
     978             : void
     979           0 : UTS46::checkLabelBiDi(const UChar *label, int32_t labelLength, IDNAInfo &info) const {
     980             :     // IDNA2008 BiDi rule
     981             :     // Get the directionality of the first character.
     982             :     UChar32 c;
     983           0 :     int32_t i=0;
     984           0 :     U16_NEXT_UNSAFE(label, i, c);
     985           0 :     uint32_t firstMask=U_MASK(u_charDirection(c));
     986             :     // 1. The first character must be a character with BIDI property L, R
     987             :     // or AL.  If it has the R or AL property, it is an RTL label; if it
     988             :     // has the L property, it is an LTR label.
     989           0 :     if((firstMask&~L_R_AL_MASK)!=0) {
     990           0 :         info.isOkBiDi=FALSE;
     991             :     }
     992             :     // Get the directionality of the last non-NSM character.
     993             :     uint32_t lastMask;
     994             :     for(;;) {
     995           0 :         if(i>=labelLength) {
     996           0 :             lastMask=firstMask;
     997           0 :             break;
     998             :         }
     999           0 :         U16_PREV_UNSAFE(label, labelLength, c);
    1000           0 :         UCharDirection dir=u_charDirection(c);
    1001           0 :         if(dir!=U_DIR_NON_SPACING_MARK) {
    1002           0 :             lastMask=U_MASK(dir);
    1003           0 :             break;
    1004             :         }
    1005           0 :     }
    1006             :     // 3. In an RTL label, the end of the label must be a character with
    1007             :     // BIDI property R, AL, EN or AN, followed by zero or more
    1008             :     // characters with BIDI property NSM.
    1009             :     // 6. In an LTR label, the end of the label must be a character with
    1010             :     // BIDI property L or EN, followed by zero or more characters with
    1011             :     // BIDI property NSM.
    1012           0 :     if( (firstMask&L_MASK)!=0 ?
    1013           0 :             (lastMask&~L_EN_MASK)!=0 :
    1014           0 :             (lastMask&~R_AL_EN_AN_MASK)!=0
    1015             :     ) {
    1016           0 :         info.isOkBiDi=FALSE;
    1017             :     }
    1018             :     // Get the directionalities of the intervening characters.
    1019           0 :     uint32_t mask=0;
    1020           0 :     while(i<labelLength) {
    1021           0 :         U16_NEXT_UNSAFE(label, i, c);
    1022           0 :         mask|=U_MASK(u_charDirection(c));
    1023             :     }
    1024           0 :     if(firstMask&L_MASK) {
    1025             :         // 5. In an LTR label, only characters with the BIDI properties L, EN,
    1026             :         // ES, CS, ET, ON, BN and NSM are allowed.
    1027           0 :         if((mask&~L_EN_ES_CS_ET_ON_BN_NSM_MASK)!=0) {
    1028           0 :             info.isOkBiDi=FALSE;
    1029             :         }
    1030             :     } else {
    1031             :         // 2. In an RTL label, only characters with the BIDI properties R, AL,
    1032             :         // AN, EN, ES, CS, ET, ON, BN and NSM are allowed.
    1033           0 :         if((mask&~R_AL_AN_EN_ES_CS_ET_ON_BN_NSM_MASK)!=0) {
    1034           0 :             info.isOkBiDi=FALSE;
    1035             :         }
    1036             :         // 4. In an RTL label, if an EN is present, no AN may be present, and
    1037             :         // vice versa.
    1038           0 :         if((mask&EN_AN_MASK)==EN_AN_MASK) {
    1039           0 :             info.isOkBiDi=FALSE;
    1040             :         }
    1041             :     }
    1042             :     // An RTL label is a label that contains at least one character of type
    1043             :     // R, AL or AN. [...]
    1044             :     // A "BIDI domain name" is a domain name that contains at least one RTL
    1045             :     // label. [...]
    1046             :     // The following rule, consisting of six conditions, applies to labels
    1047             :     // in BIDI domain names.
    1048           0 :     if(((firstMask|mask|lastMask)&R_AL_AN_MASK)!=0) {
    1049           0 :         info.isBiDi=TRUE;
    1050             :     }
    1051           0 : }
    1052             : 
    1053             : // Special code for the ASCII prefix of a BiDi domain name.
    1054             : // The ASCII prefix is all-LTR.
    1055             : 
    1056             : // IDNA2008 BiDi rule, parts relevant to ASCII labels:
    1057             : // 1. The first character must be a character with BIDI property L [...]
    1058             : // 5. In an LTR label, only characters with the BIDI properties L, EN,
    1059             : // ES, CS, ET, ON, BN and NSM are allowed.
    1060             : // 6. In an LTR label, the end of the label must be a character with
    1061             : // BIDI property L or EN [...]
    1062             : 
    1063             : // UTF-16 version, called for mapped ASCII prefix.
    1064             : // Cannot contain uppercase A-Z.
    1065             : // s[length-1] must be the trailing dot.
    1066             : static UBool
    1067           0 : isASCIIOkBiDi(const UChar *s, int32_t length) {
    1068           0 :     int32_t labelStart=0;
    1069           0 :     for(int32_t i=0; i<length; ++i) {
    1070           0 :         UChar c=s[i];
    1071           0 :         if(c==0x2e) {  // dot
    1072           0 :             if(i>labelStart) {
    1073           0 :                 c=s[i-1];
    1074           0 :                 if(!(0x61<=c && c<=0x7a) && !(0x30<=c && c<=0x39)) {
    1075             :                     // Last character in the label is not an L or EN.
    1076           0 :                     return FALSE;
    1077             :                 }
    1078             :             }
    1079           0 :             labelStart=i+1;
    1080           0 :         } else if(i==labelStart) {
    1081           0 :             if(!(0x61<=c && c<=0x7a)) {
    1082             :                 // First character in the label is not an L.
    1083           0 :                 return FALSE;
    1084             :             }
    1085             :         } else {
    1086           0 :             if(c<=0x20 && (c>=0x1c || (9<=c && c<=0xd))) {
    1087             :                 // Intermediate character in the label is a B, S or WS.
    1088           0 :                 return FALSE;
    1089             :             }
    1090             :         }
    1091             :     }
    1092           0 :     return TRUE;
    1093             : }
    1094             : 
    1095             : // UTF-8 version, called for source ASCII prefix.
    1096             : // Can contain uppercase A-Z.
    1097             : // s[length-1] must be the trailing dot.
    1098             : static UBool
    1099           0 : isASCIIOkBiDi(const char *s, int32_t length) {
    1100           0 :     int32_t labelStart=0;
    1101           0 :     for(int32_t i=0; i<length; ++i) {
    1102           0 :         char c=s[i];
    1103           0 :         if(c==0x2e) {  // dot
    1104           0 :             if(i>labelStart) {
    1105           0 :                 c=s[i-1];
    1106           0 :                 if(!(0x61<=c && c<=0x7a) && !(0x41<=c && c<=0x5a) && !(0x30<=c && c<=0x39)) {
    1107             :                     // Last character in the label is not an L or EN.
    1108           0 :                     return FALSE;
    1109             :                 }
    1110             :             }
    1111           0 :             labelStart=i+1;
    1112           0 :         } else if(i==labelStart) {
    1113           0 :             if(!(0x61<=c && c<=0x7a) && !(0x41<=c && c<=0x5a)) {
    1114             :                 // First character in the label is not an L.
    1115           0 :                 return FALSE;
    1116             :             }
    1117             :         } else {
    1118           0 :             if(c<=0x20 && (c>=0x1c || (9<=c && c<=0xd))) {
    1119             :                 // Intermediate character in the label is a B, S or WS.
    1120           0 :                 return FALSE;
    1121             :             }
    1122             :         }
    1123             :     }
    1124           0 :     return TRUE;
    1125             : }
    1126             : 
    1127             : UBool
    1128           0 : UTS46::isLabelOkContextJ(const UChar *label, int32_t labelLength) const {
    1129           0 :     const UBiDiProps *bdp=ubidi_getSingleton();
    1130             :     // [IDNA2008-Tables]
    1131             :     // 200C..200D  ; CONTEXTJ    # ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
    1132           0 :     for(int32_t i=0; i<labelLength; ++i) {
    1133           0 :         if(label[i]==0x200c) {
    1134             :             // Appendix A.1. ZERO WIDTH NON-JOINER
    1135             :             // Rule Set:
    1136             :             //  False;
    1137             :             //  If Canonical_Combining_Class(Before(cp)) .eq.  Virama Then True;
    1138             :             //  If RegExpMatch((Joining_Type:{L,D})(Joining_Type:T)*\u200C
    1139             :             //     (Joining_Type:T)*(Joining_Type:{R,D})) Then True;
    1140           0 :             if(i==0) {
    1141           0 :                 return FALSE;
    1142             :             }
    1143             :             UChar32 c;
    1144           0 :             int32_t j=i;
    1145           0 :             U16_PREV_UNSAFE(label, j, c);
    1146           0 :             if(uts46Norm2.getCombiningClass(c)==9) {
    1147           0 :                 continue;
    1148             :             }
    1149             :             // check precontext (Joining_Type:{L,D})(Joining_Type:T)*
    1150             :             for(;;) {
    1151           0 :                 UJoiningType type=ubidi_getJoiningType(bdp, c);
    1152           0 :                 if(type==U_JT_TRANSPARENT) {
    1153           0 :                     if(j==0) {
    1154           0 :                         return FALSE;
    1155             :                     }
    1156           0 :                     U16_PREV_UNSAFE(label, j, c);
    1157           0 :                 } else if(type==U_JT_LEFT_JOINING || type==U_JT_DUAL_JOINING) {
    1158             :                     break;  // precontext fulfilled
    1159             :                 } else {
    1160           0 :                     return FALSE;
    1161             :                 }
    1162           0 :             }
    1163             :             // check postcontext (Joining_Type:T)*(Joining_Type:{R,D})
    1164           0 :             for(j=i+1;;) {
    1165           0 :                 if(j==labelLength) {
    1166           0 :                     return FALSE;
    1167             :                 }
    1168           0 :                 U16_NEXT_UNSAFE(label, j, c);
    1169           0 :                 UJoiningType type=ubidi_getJoiningType(bdp, c);
    1170           0 :                 if(type==U_JT_TRANSPARENT) {
    1171             :                     // just skip this character
    1172           0 :                 } else if(type==U_JT_RIGHT_JOINING || type==U_JT_DUAL_JOINING) {
    1173             :                     break;  // postcontext fulfilled
    1174             :                 } else {
    1175           0 :                     return FALSE;
    1176             :                 }
    1177           0 :             }
    1178           0 :         } else if(label[i]==0x200d) {
    1179             :             // Appendix A.2. ZERO WIDTH JOINER (U+200D)
    1180             :             // Rule Set:
    1181             :             //  False;
    1182             :             //  If Canonical_Combining_Class(Before(cp)) .eq.  Virama Then True;
    1183           0 :             if(i==0) {
    1184           0 :                 return FALSE;
    1185             :             }
    1186             :             UChar32 c;
    1187           0 :             int32_t j=i;
    1188           0 :             U16_PREV_UNSAFE(label, j, c);
    1189           0 :             if(uts46Norm2.getCombiningClass(c)!=9) {
    1190           0 :                 return FALSE;
    1191             :             }
    1192             :         }
    1193             :     }
    1194           0 :     return TRUE;
    1195             : }
    1196             : 
    1197             : void
    1198           0 : UTS46::checkLabelContextO(const UChar *label, int32_t labelLength, IDNAInfo &info) const {
    1199           0 :     int32_t labelEnd=labelLength-1;  // inclusive
    1200           0 :     int32_t arabicDigits=0;  // -1 for 066x, +1 for 06Fx
    1201           0 :     for(int32_t i=0; i<=labelEnd; ++i) {
    1202           0 :         UChar32 c=label[i];
    1203           0 :         if(c<0xb7) {
    1204             :             // ASCII fastpath
    1205           0 :         } else if(c<=0x6f9) {
    1206           0 :             if(c==0xb7) {
    1207             :                 // Appendix A.3. MIDDLE DOT (U+00B7)
    1208             :                 // Rule Set:
    1209             :                 //  False;
    1210             :                 //  If Before(cp) .eq.  U+006C And
    1211             :                 //     After(cp) .eq.  U+006C Then True;
    1212           0 :                 if(!(0<i && label[i-1]==0x6c &&
    1213           0 :                      i<labelEnd && label[i+1]==0x6c)) {
    1214           0 :                     info.labelErrors|=UIDNA_ERROR_CONTEXTO_PUNCTUATION;
    1215             :                 }
    1216           0 :             } else if(c==0x375) {
    1217             :                 // Appendix A.4. GREEK LOWER NUMERAL SIGN (KERAIA) (U+0375)
    1218             :                 // Rule Set:
    1219             :                 //  False;
    1220             :                 //  If Script(After(cp)) .eq.  Greek Then True;
    1221           0 :                 UScriptCode script=USCRIPT_INVALID_CODE;
    1222           0 :                 if(i<labelEnd) {
    1223           0 :                     UErrorCode errorCode=U_ZERO_ERROR;
    1224           0 :                     int32_t j=i+1;
    1225           0 :                     U16_NEXT(label, j, labelLength, c);
    1226           0 :                     script=uscript_getScript(c, &errorCode);
    1227             :                 }
    1228           0 :                 if(script!=USCRIPT_GREEK) {
    1229           0 :                     info.labelErrors|=UIDNA_ERROR_CONTEXTO_PUNCTUATION;
    1230             :                 }
    1231           0 :             } else if(c==0x5f3 || c==0x5f4) {
    1232             :                 // Appendix A.5. HEBREW PUNCTUATION GERESH (U+05F3)
    1233             :                 // Rule Set:
    1234             :                 //  False;
    1235             :                 //  If Script(Before(cp)) .eq.  Hebrew Then True;
    1236             :                 //
    1237             :                 // Appendix A.6. HEBREW PUNCTUATION GERSHAYIM (U+05F4)
    1238             :                 // Rule Set:
    1239             :                 //  False;
    1240             :                 //  If Script(Before(cp)) .eq.  Hebrew Then True;
    1241           0 :                 UScriptCode script=USCRIPT_INVALID_CODE;
    1242           0 :                 if(0<i) {
    1243           0 :                     UErrorCode errorCode=U_ZERO_ERROR;
    1244           0 :                     int32_t j=i;
    1245           0 :                     U16_PREV(label, 0, j, c);
    1246           0 :                     script=uscript_getScript(c, &errorCode);
    1247             :                 }
    1248           0 :                 if(script!=USCRIPT_HEBREW) {
    1249           0 :                     info.labelErrors|=UIDNA_ERROR_CONTEXTO_PUNCTUATION;
    1250           0 :                 }
    1251           0 :             } else if(0x660<=c /* && c<=0x6f9 */) {
    1252             :                 // Appendix A.8. ARABIC-INDIC DIGITS (0660..0669)
    1253             :                 // Rule Set:
    1254             :                 //  True;
    1255             :                 //  For All Characters:
    1256             :                 //    If cp .in. 06F0..06F9 Then False;
    1257             :                 //  End For;
    1258             :                 //
    1259             :                 // Appendix A.9. EXTENDED ARABIC-INDIC DIGITS (06F0..06F9)
    1260             :                 // Rule Set:
    1261             :                 //  True;
    1262             :                 //  For All Characters:
    1263             :                 //    If cp .in. 0660..0669 Then False;
    1264             :                 //  End For;
    1265           0 :                 if(c<=0x669) {
    1266           0 :                     if(arabicDigits>0) {
    1267           0 :                         info.labelErrors|=UIDNA_ERROR_CONTEXTO_DIGITS;
    1268             :                     }
    1269           0 :                     arabicDigits=-1;
    1270           0 :                 } else if(0x6f0<=c) {
    1271           0 :                     if(arabicDigits<0) {
    1272           0 :                         info.labelErrors|=UIDNA_ERROR_CONTEXTO_DIGITS;
    1273             :                     }
    1274           0 :                     arabicDigits=1;
    1275             :                 }
    1276             :             }
    1277           0 :         } else if(c==0x30fb) {
    1278             :             // Appendix A.7. KATAKANA MIDDLE DOT (U+30FB)
    1279             :             // Rule Set:
    1280             :             //  False;
    1281             :             //  For All Characters:
    1282             :             //    If Script(cp) .in. {Hiragana, Katakana, Han} Then True;
    1283             :             //  End For;
    1284           0 :             UErrorCode errorCode=U_ZERO_ERROR;
    1285           0 :             for(int j=0;;) {
    1286           0 :                 if(j>labelEnd) {
    1287           0 :                     info.labelErrors|=UIDNA_ERROR_CONTEXTO_PUNCTUATION;
    1288           0 :                     break;
    1289             :                 }
    1290           0 :                 U16_NEXT(label, j, labelLength, c);
    1291           0 :                 UScriptCode script=uscript_getScript(c, &errorCode);
    1292           0 :                 if(script==USCRIPT_HIRAGANA || script==USCRIPT_KATAKANA || script==USCRIPT_HAN) {
    1293             :                     break;
    1294             :                 }
    1295           0 :             }
    1296             :         }
    1297             :     }
    1298           0 : }
    1299             : 
    1300             : U_NAMESPACE_END
    1301             : 
    1302             : // C API ------------------------------------------------------------------- ***
    1303             : 
    1304             : U_NAMESPACE_USE
    1305             : 
    1306             : U_CAPI UIDNA * U_EXPORT2
    1307           3 : uidna_openUTS46(uint32_t options, UErrorCode *pErrorCode) {
    1308           3 :     return reinterpret_cast<UIDNA *>(IDNA::createUTS46Instance(options, *pErrorCode));
    1309             : }
    1310             : 
    1311             : U_CAPI void U_EXPORT2
    1312           0 : uidna_close(UIDNA *idna) {
    1313           0 :     delete reinterpret_cast<IDNA *>(idna);
    1314           0 : }
    1315             : 
    1316             : static UBool
    1317           0 : checkArgs(const void *label, int32_t length,
    1318             :           void *dest, int32_t capacity,
    1319             :           UIDNAInfo *pInfo, UErrorCode *pErrorCode) {
    1320           0 :     if(U_FAILURE(*pErrorCode)) {
    1321           0 :         return FALSE;
    1322             :     }
    1323             :     // sizeof(UIDNAInfo)=16 in the first API version.
    1324           0 :     if(pInfo==NULL || pInfo->size<16) {
    1325           0 :         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    1326           0 :         return FALSE;
    1327             :     }
    1328           0 :     if( (label==NULL ? length!=0 : length<-1) ||
    1329           0 :         (dest==NULL ? capacity!=0 : capacity<0) ||
    1330           0 :         (dest==label && label!=NULL)
    1331             :     ) {
    1332           0 :         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    1333           0 :         return FALSE;
    1334             :     }
    1335             :     // Set all *pInfo bytes to 0 except for the size field itself.
    1336           0 :     uprv_memset(&pInfo->size+1, 0, pInfo->size-sizeof(pInfo->size));
    1337           0 :     return TRUE;
    1338             : }
    1339             : 
    1340             : static void
    1341           0 : idnaInfoToStruct(IDNAInfo &info, UIDNAInfo *pInfo) {
    1342           0 :     pInfo->isTransitionalDifferent=info.isTransitionalDifferent();
    1343           0 :     pInfo->errors=info.getErrors();
    1344           0 : }
    1345             : 
    1346             : U_CAPI int32_t U_EXPORT2
    1347           0 : uidna_labelToASCII(const UIDNA *idna,
    1348             :                    const UChar *label, int32_t length,
    1349             :                    UChar *dest, int32_t capacity,
    1350             :                    UIDNAInfo *pInfo, UErrorCode *pErrorCode) {
    1351           0 :     if(!checkArgs(label, length, dest, capacity, pInfo, pErrorCode)) {
    1352           0 :         return 0;
    1353             :     }
    1354           0 :     UnicodeString src((UBool)(length<0), label, length);
    1355           0 :     UnicodeString destString(dest, 0, capacity);
    1356           0 :     IDNAInfo info;
    1357           0 :     reinterpret_cast<const IDNA *>(idna)->labelToASCII(src, destString, info, *pErrorCode);
    1358           0 :     idnaInfoToStruct(info, pInfo);
    1359           0 :     return destString.extract(dest, capacity, *pErrorCode);
    1360             : }
    1361             : 
    1362             : U_CAPI int32_t U_EXPORT2
    1363           0 : uidna_labelToUnicode(const UIDNA *idna,
    1364             :                      const UChar *label, int32_t length,
    1365             :                      UChar *dest, int32_t capacity,
    1366             :                      UIDNAInfo *pInfo, UErrorCode *pErrorCode) {
    1367           0 :     if(!checkArgs(label, length, dest, capacity, pInfo, pErrorCode)) {
    1368           0 :         return 0;
    1369             :     }
    1370           0 :     UnicodeString src((UBool)(length<0), label, length);
    1371           0 :     UnicodeString destString(dest, 0, capacity);
    1372           0 :     IDNAInfo info;
    1373           0 :     reinterpret_cast<const IDNA *>(idna)->labelToUnicode(src, destString, info, *pErrorCode);
    1374           0 :     idnaInfoToStruct(info, pInfo);
    1375           0 :     return destString.extract(dest, capacity, *pErrorCode);
    1376             : }
    1377             : 
    1378             : U_CAPI int32_t U_EXPORT2
    1379           0 : uidna_nameToASCII(const UIDNA *idna,
    1380             :                   const UChar *name, int32_t length,
    1381             :                   UChar *dest, int32_t capacity,
    1382             :                   UIDNAInfo *pInfo, UErrorCode *pErrorCode) {
    1383           0 :     if(!checkArgs(name, length, dest, capacity, pInfo, pErrorCode)) {
    1384           0 :         return 0;
    1385             :     }
    1386           0 :     UnicodeString src((UBool)(length<0), name, length);
    1387           0 :     UnicodeString destString(dest, 0, capacity);
    1388           0 :     IDNAInfo info;
    1389           0 :     reinterpret_cast<const IDNA *>(idna)->nameToASCII(src, destString, info, *pErrorCode);
    1390           0 :     idnaInfoToStruct(info, pInfo);
    1391           0 :     return destString.extract(dest, capacity, *pErrorCode);
    1392             : }
    1393             : 
    1394             : U_CAPI int32_t U_EXPORT2
    1395           0 : uidna_nameToUnicode(const UIDNA *idna,
    1396             :                     const UChar *name, int32_t length,
    1397             :                     UChar *dest, int32_t capacity,
    1398             :                     UIDNAInfo *pInfo, UErrorCode *pErrorCode) {
    1399           0 :     if(!checkArgs(name, length, dest, capacity, pInfo, pErrorCode)) {
    1400           0 :         return 0;
    1401             :     }
    1402           0 :     UnicodeString src((UBool)(length<0), name, length);
    1403           0 :     UnicodeString destString(dest, 0, capacity);
    1404           0 :     IDNAInfo info;
    1405           0 :     reinterpret_cast<const IDNA *>(idna)->nameToUnicode(src, destString, info, *pErrorCode);
    1406           0 :     idnaInfoToStruct(info, pInfo);
    1407           0 :     return destString.extract(dest, capacity, *pErrorCode);
    1408             : }
    1409             : 
    1410             : U_CAPI int32_t U_EXPORT2
    1411           0 : uidna_labelToASCII_UTF8(const UIDNA *idna,
    1412             :                         const char *label, int32_t length,
    1413             :                         char *dest, int32_t capacity,
    1414             :                         UIDNAInfo *pInfo, UErrorCode *pErrorCode) {
    1415           0 :     if(!checkArgs(label, length, dest, capacity, pInfo, pErrorCode)) {
    1416           0 :         return 0;
    1417             :     }
    1418           0 :     StringPiece src(label, length<0 ? uprv_strlen(label) : length);
    1419           0 :     CheckedArrayByteSink sink(dest, capacity);
    1420           0 :     IDNAInfo info;
    1421           0 :     reinterpret_cast<const IDNA *>(idna)->labelToASCII_UTF8(src, sink, info, *pErrorCode);
    1422           0 :     idnaInfoToStruct(info, pInfo);
    1423           0 :     return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode);
    1424             : }
    1425             : 
    1426             : U_CAPI int32_t U_EXPORT2
    1427           0 : uidna_labelToUnicodeUTF8(const UIDNA *idna,
    1428             :                          const char *label, int32_t length,
    1429             :                          char *dest, int32_t capacity,
    1430             :                          UIDNAInfo *pInfo, UErrorCode *pErrorCode) {
    1431           0 :     if(!checkArgs(label, length, dest, capacity, pInfo, pErrorCode)) {
    1432           0 :         return 0;
    1433             :     }
    1434           0 :     StringPiece src(label, length<0 ? uprv_strlen(label) : length);
    1435           0 :     CheckedArrayByteSink sink(dest, capacity);
    1436           0 :     IDNAInfo info;
    1437           0 :     reinterpret_cast<const IDNA *>(idna)->labelToUnicodeUTF8(src, sink, info, *pErrorCode);
    1438           0 :     idnaInfoToStruct(info, pInfo);
    1439           0 :     return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode);
    1440             : }
    1441             : 
    1442             : U_CAPI int32_t U_EXPORT2
    1443           0 : uidna_nameToASCII_UTF8(const UIDNA *idna,
    1444             :                        const char *name, int32_t length,
    1445             :                        char *dest, int32_t capacity,
    1446             :                        UIDNAInfo *pInfo, UErrorCode *pErrorCode) {
    1447           0 :     if(!checkArgs(name, length, dest, capacity, pInfo, pErrorCode)) {
    1448           0 :         return 0;
    1449             :     }
    1450           0 :     StringPiece src(name, length<0 ? uprv_strlen(name) : length);
    1451           0 :     CheckedArrayByteSink sink(dest, capacity);
    1452           0 :     IDNAInfo info;
    1453           0 :     reinterpret_cast<const IDNA *>(idna)->nameToASCII_UTF8(src, sink, info, *pErrorCode);
    1454           0 :     idnaInfoToStruct(info, pInfo);
    1455           0 :     return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode);
    1456             : }
    1457             : 
    1458             : U_CAPI int32_t U_EXPORT2
    1459           0 : uidna_nameToUnicodeUTF8(const UIDNA *idna,
    1460             :                         const char *name, int32_t length,
    1461             :                         char *dest, int32_t capacity,
    1462             :                         UIDNAInfo *pInfo, UErrorCode *pErrorCode) {
    1463           0 :     if(!checkArgs(name, length, dest, capacity, pInfo, pErrorCode)) {
    1464           0 :         return 0;
    1465             :     }
    1466           0 :     StringPiece src(name, length<0 ? uprv_strlen(name) : length);
    1467           0 :     CheckedArrayByteSink sink(dest, capacity);
    1468           0 :     IDNAInfo info;
    1469           0 :     reinterpret_cast<const IDNA *>(idna)->nameToUnicodeUTF8(src, sink, info, *pErrorCode);
    1470           0 :     idnaInfoToStruct(info, pInfo);
    1471           0 :     return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode);
    1472             : }
    1473             : 
    1474             : #endif  // UCONFIG_NO_IDNA

Generated by: LCOV version 1.13