LCOV - code coverage report
Current view: top level - intl/icu/source/common - uinvchar.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 7 198 3.5 %
Date: 2017-07-14 16:53:18 Functions: 1 14 7.1 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             : *******************************************************************************
       5             : *
       6             : *   Copyright (C) 1999-2010, International Business Machines
       7             : *   Corporation and others.  All Rights Reserved.
       8             : *
       9             : *******************************************************************************
      10             : *   file name:  uinvchar.c
      11             : *   encoding:   UTF-8
      12             : *   tab size:   8 (not used)
      13             : *   indentation:2
      14             : *
      15             : *   created on: 2004sep14
      16             : *   created by: Markus W. Scherer
      17             : *
      18             : *   Functions for handling invariant characters, moved here from putil.c
      19             : *   for better modularization.
      20             : */
      21             : 
      22             : #include "unicode/utypes.h"
      23             : #include "unicode/ustring.h"
      24             : #include "udataswp.h"
      25             : #include "cstring.h"
      26             : #include "cmemory.h"
      27             : #include "uassert.h"
      28             : #include "uinvchar.h"
      29             : 
      30             : /* invariant-character handling --------------------------------------------- */
      31             : 
      32             : /*
      33             :  * These maps for ASCII to/from EBCDIC map invariant characters (see utypes.h)
      34             :  * appropriately for most EBCDIC codepages.
      35             :  *
      36             :  * They currently also map most other ASCII graphic characters,
      37             :  * appropriately for codepages 37 and 1047.
      38             :  * Exceptions: The characters for []^ have different codes in 37 & 1047.
      39             :  * Both versions are mapped to ASCII.
      40             :  *
      41             :  *    ASCII 37 1047
      42             :  * [     5B BA   AD
      43             :  * ]     5D BB   BD
      44             :  * ^     5E B0   5F
      45             :  *
      46             :  * There are no mappings for variant characters from Unicode to EBCDIC.
      47             :  *
      48             :  * Currently, C0 control codes are also included in these maps.
      49             :  * Exceptions: S/390 Open Edition swaps LF and NEL codes compared with other
      50             :  * EBCDIC platforms; both codes (15 and 25) are mapped to ASCII LF (0A),
      51             :  * but there is no mapping for ASCII LF back to EBCDIC.
      52             :  *
      53             :  *    ASCII EBCDIC S/390-OE
      54             :  * LF    0A     25       15
      55             :  * NEL   85     15       25
      56             :  *
      57             :  * The maps below explicitly exclude the variant
      58             :  * control and graphical characters that are in ASCII-based
      59             :  * codepages at 0x80 and above.
      60             :  * "No mapping" is expressed by mapping to a 00 byte.
      61             :  *
      62             :  * These tables do not establish a converter or a codepage.
      63             :  */
      64             : 
      65             : static const uint8_t asciiFromEbcdic[256]={
      66             :     0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
      67             :     0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f,
      68             :     0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07,
      69             :     0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a,
      70             : 
      71             :     0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
      72             :     0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e,
      73             :     0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
      74             :     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
      75             : 
      76             :     0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
      77             :     0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
      78             :     0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00,
      79             :     0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00,
      80             : 
      81             :     0x7b, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
      82             :     0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
      83             :     0x5c, 0x00, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
      84             :     0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
      85             : };
      86             : 
      87             : static const uint8_t ebcdicFromAscii[256]={
      88             :     0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f, 0x16, 0x05, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
      89             :     0x10, 0x11, 0x12, 0x13, 0x3c, 0x3d, 0x32, 0x26, 0x18, 0x19, 0x3f, 0x27, 0x1c, 0x1d, 0x1e, 0x1f,
      90             :     0x40, 0x00, 0x7f, 0x00, 0x00, 0x6c, 0x50, 0x7d, 0x4d, 0x5d, 0x5c, 0x4e, 0x6b, 0x60, 0x4b, 0x61,
      91             :     0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0x7a, 0x5e, 0x4c, 0x7e, 0x6e, 0x6f,
      92             : 
      93             :     0x00, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6,
      94             :     0xd7, 0xd8, 0xd9, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0x00, 0x00, 0x00, 0x00, 0x6d,
      95             :     0x00, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
      96             :     0x97, 0x98, 0x99, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0x00, 0x00, 0x00, 0x00, 0x07,
      97             : 
      98             :     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      99             :     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     100             :     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     101             :     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     102             : 
     103             :     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     104             :     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     105             :     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     106             :     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
     107             : };
     108             : 
     109             : /* Same as asciiFromEbcdic[] except maps all letters to lowercase. */
     110             : static const uint8_t lowercaseAsciiFromEbcdic[256]={
     111             :     0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
     112             :     0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f,
     113             :     0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07,
     114             :     0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a,
     115             : 
     116             :     0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
     117             :     0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e,
     118             :     0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
     119             :     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
     120             : 
     121             :     0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
     122             :     0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
     123             :     0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00,
     124             :     0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00,
     125             : 
     126             :     0x7b, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
     127             :     0x7d, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
     128             :     0x7c, 0x00, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
     129             :     0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
     130             : };
     131             : 
     132             : /*
     133             :  * Bit sets indicating which characters of the ASCII repertoire
     134             :  * (by ASCII/Unicode code) are "invariant".
     135             :  * See utypes.h for more details.
     136             :  *
     137             :  * As invariant are considered the characters of the ASCII repertoire except
     138             :  * for the following:
     139             :  * 21  '!' <exclamation mark>
     140             :  * 23  '#' <number sign>
     141             :  * 24  '$' <dollar sign>
     142             :  *
     143             :  * 40  '@' <commercial at>
     144             :  *
     145             :  * 5b  '[' <left bracket>
     146             :  * 5c  '\' <backslash>
     147             :  * 5d  ']' <right bracket>
     148             :  * 5e  '^' <circumflex>
     149             :  *
     150             :  * 60  '`' <grave accent>
     151             :  *
     152             :  * 7b  '{' <left brace>
     153             :  * 7c  '|' <vertical line>
     154             :  * 7d  '}' <right brace>
     155             :  * 7e  '~' <tilde>
     156             :  */
     157             : static const uint32_t invariantChars[4]={
     158             :     0xfffffbff, /* 00..1f but not 0a */
     159             :     0xffffffe5, /* 20..3f but not 21 23 24 */
     160             :     0x87fffffe, /* 40..5f but not 40 5b..5e */
     161             :     0x87fffffe  /* 60..7f but not 60 7b..7e */
     162             : };
     163             : 
     164             : /*
     165             :  * test unsigned types (or values known to be non-negative) for invariant characters,
     166             :  * tests ASCII-family character values
     167             :  */
     168             : #define UCHAR_IS_INVARIANT(c) (((c)<=0x7f) && (invariantChars[(c)>>5]&((uint32_t)1<<((c)&0x1f)))!=0)
     169             : 
     170             : /* test signed types for invariant characters, adds test for positive values */
     171             : #define SCHAR_IS_INVARIANT(c) ((0<=(c)) && UCHAR_IS_INVARIANT(c))
     172             : 
     173             : #if U_CHARSET_FAMILY==U_ASCII_FAMILY
     174             : #define CHAR_TO_UCHAR(c) c
     175             : #define UCHAR_TO_CHAR(c) c
     176             : #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
     177             : #define CHAR_TO_UCHAR(u) asciiFromEbcdic[u]
     178             : #define UCHAR_TO_CHAR(u) ebcdicFromAscii[u]
     179             : #else
     180             : #   error U_CHARSET_FAMILY is not valid
     181             : #endif
     182             : 
     183             : 
     184             : U_CAPI void U_EXPORT2
     185           0 : u_charsToUChars(const char *cs, UChar *us, int32_t length) {
     186             :     UChar u;
     187             :     uint8_t c;
     188             : 
     189             :     /*
     190             :      * Allow the entire ASCII repertoire to be mapped _to_ Unicode.
     191             :      * For EBCDIC systems, this works for characters with codes from
     192             :      * codepages 37 and 1047 or compatible.
     193             :      */
     194           0 :     while(length>0) {
     195           0 :         c=(uint8_t)(*cs++);
     196           0 :         u=(UChar)CHAR_TO_UCHAR(c);
     197           0 :         U_ASSERT((u!=0 || c==0)); /* only invariant chars converted? */
     198           0 :         *us++=u;
     199           0 :         --length;
     200             :     }
     201           0 : }
     202             : 
     203             : U_CAPI void U_EXPORT2
     204          24 : u_UCharsToChars(const UChar *us, char *cs, int32_t length) {
     205             :     UChar u;
     206             : 
     207          46 :     while(length>0) {
     208          22 :         u=*us++;
     209          22 :         if(!UCHAR_IS_INVARIANT(u)) {
     210           0 :             U_ASSERT(FALSE); /* Variant characters were used. These are not portable in ICU. */
     211             :             u=0;
     212             :         }
     213          22 :         *cs++=(char)UCHAR_TO_CHAR(u);
     214          22 :         --length;
     215             :     }
     216           2 : }
     217             : 
     218             : U_CAPI UBool U_EXPORT2
     219           0 : uprv_isInvariantString(const char *s, int32_t length) {
     220             :     uint8_t c;
     221             : 
     222             :     for(;;) {
     223           0 :         if(length<0) {
     224             :             /* NUL-terminated */
     225           0 :             c=(uint8_t)*s++;
     226           0 :             if(c==0) {
     227           0 :                 break;
     228             :             }
     229             :         } else {
     230             :             /* count length */
     231           0 :             if(length==0) {
     232           0 :                 break;
     233             :             }
     234           0 :             --length;
     235           0 :             c=(uint8_t)*s++;
     236           0 :             if(c==0) {
     237           0 :                 continue; /* NUL is invariant */
     238             :             }
     239             :         }
     240             :         /* c!=0 now, one branch below checks c==0 for variant characters */
     241             : 
     242             :         /*
     243             :          * no assertions here because these functions are legitimately called
     244             :          * for strings with variant characters
     245             :          */
     246             : #if U_CHARSET_FAMILY==U_ASCII_FAMILY
     247           0 :         if(!UCHAR_IS_INVARIANT(c)) {
     248           0 :             return FALSE; /* found a variant char */
     249             :         }
     250             : #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
     251             :         c=CHAR_TO_UCHAR(c);
     252             :         if(c==0 || !UCHAR_IS_INVARIANT(c)) {
     253             :             return FALSE; /* found a variant char */
     254             :         }
     255             : #else
     256             : #   error U_CHARSET_FAMILY is not valid
     257             : #endif
     258             :     }
     259           0 :     return TRUE;
     260             : }
     261             : 
     262             : U_CAPI UBool U_EXPORT2
     263           0 : uprv_isInvariantUString(const UChar *s, int32_t length) {
     264             :     UChar c;
     265             : 
     266             :     for(;;) {
     267           0 :         if(length<0) {
     268             :             /* NUL-terminated */
     269           0 :             c=*s++;
     270           0 :             if(c==0) {
     271           0 :                 break;
     272             :             }
     273             :         } else {
     274             :             /* count length */
     275           0 :             if(length==0) {
     276           0 :                 break;
     277             :             }
     278           0 :             --length;
     279           0 :             c=*s++;
     280             :         }
     281             : 
     282             :         /*
     283             :          * no assertions here because these functions are legitimately called
     284             :          * for strings with variant characters
     285             :          */
     286           0 :         if(!UCHAR_IS_INVARIANT(c)) {
     287           0 :             return FALSE; /* found a variant char */
     288             :         }
     289             :     }
     290           0 :     return TRUE;
     291             : }
     292             : 
     293             : /* UDataSwapFn implementations used in udataswp.c ------- */
     294             : 
     295             : /* convert ASCII to EBCDIC and verify that all characters are invariant */
     296             : U_CAPI int32_t U_EXPORT2
     297           0 : uprv_ebcdicFromAscii(const UDataSwapper *ds,
     298             :                      const void *inData, int32_t length, void *outData,
     299             :                      UErrorCode *pErrorCode) {
     300             :     const uint8_t *s;
     301             :     uint8_t *t;
     302             :     uint8_t c;
     303             : 
     304             :     int32_t count;
     305             : 
     306           0 :     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
     307           0 :         return 0;
     308             :     }
     309           0 :     if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
     310           0 :         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
     311           0 :         return 0;
     312             :     }
     313             : 
     314             :     /* setup and swapping */
     315           0 :     s=(const uint8_t *)inData;
     316           0 :     t=(uint8_t *)outData;
     317           0 :     count=length;
     318           0 :     while(count>0) {
     319           0 :         c=*s++;
     320           0 :         if(!UCHAR_IS_INVARIANT(c)) {
     321           0 :             udata_printError(ds, "uprv_ebcdicFromAscii() string[%d] contains a variant character in position %d\n",
     322           0 :                              length, length-count);
     323           0 :             *pErrorCode=U_INVALID_CHAR_FOUND;
     324           0 :             return 0;
     325             :         }
     326           0 :         *t++=ebcdicFromAscii[c];
     327           0 :         --count;
     328             :     }
     329             : 
     330           0 :     return length;
     331             : }
     332             : 
     333             : /* this function only checks and copies ASCII strings without conversion */
     334             : U_CFUNC int32_t
     335           0 : uprv_copyAscii(const UDataSwapper *ds,
     336             :                const void *inData, int32_t length, void *outData,
     337             :                UErrorCode *pErrorCode) {
     338             :     const uint8_t *s;
     339             :     uint8_t c;
     340             : 
     341             :     int32_t count;
     342             : 
     343           0 :     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
     344           0 :         return 0;
     345             :     }
     346           0 :     if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
     347           0 :         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
     348           0 :         return 0;
     349             :     }
     350             : 
     351             :     /* setup and checking */
     352           0 :     s=(const uint8_t *)inData;
     353           0 :     count=length;
     354           0 :     while(count>0) {
     355           0 :         c=*s++;
     356           0 :         if(!UCHAR_IS_INVARIANT(c)) {
     357           0 :             udata_printError(ds, "uprv_copyFromAscii() string[%d] contains a variant character in position %d\n",
     358           0 :                              length, length-count);
     359           0 :             *pErrorCode=U_INVALID_CHAR_FOUND;
     360           0 :             return 0;
     361             :         }
     362           0 :         --count;
     363             :     }
     364             : 
     365           0 :     if(length>0 && inData!=outData) {
     366           0 :         uprv_memcpy(outData, inData, length);
     367             :     }
     368             : 
     369           0 :     return length;
     370             : }
     371             : 
     372             : /* convert EBCDIC to ASCII and verify that all characters are invariant */
     373             : U_CFUNC int32_t
     374           0 : uprv_asciiFromEbcdic(const UDataSwapper *ds,
     375             :                      const void *inData, int32_t length, void *outData,
     376             :                      UErrorCode *pErrorCode) {
     377             :     const uint8_t *s;
     378             :     uint8_t *t;
     379             :     uint8_t c;
     380             : 
     381             :     int32_t count;
     382             : 
     383           0 :     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
     384           0 :         return 0;
     385             :     }
     386           0 :     if(ds==NULL || inData==NULL || length<0 ||  (length>0 && outData==NULL)) {
     387           0 :         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
     388           0 :         return 0;
     389             :     }
     390             : 
     391             :     /* setup and swapping */
     392           0 :     s=(const uint8_t *)inData;
     393           0 :     t=(uint8_t *)outData;
     394           0 :     count=length;
     395           0 :     while(count>0) {
     396           0 :         c=*s++;
     397           0 :         if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) {
     398           0 :             udata_printError(ds, "uprv_asciiFromEbcdic() string[%d] contains a variant character in position %d\n",
     399           0 :                              length, length-count);
     400           0 :             *pErrorCode=U_INVALID_CHAR_FOUND;
     401           0 :             return 0;
     402             :         }
     403           0 :         *t++=c;
     404           0 :         --count;
     405             :     }
     406             : 
     407           0 :     return length;
     408             : }
     409             : 
     410             : /* this function only checks and copies EBCDIC strings without conversion */
     411             : U_CFUNC int32_t
     412           0 : uprv_copyEbcdic(const UDataSwapper *ds,
     413             :                 const void *inData, int32_t length, void *outData,
     414             :                 UErrorCode *pErrorCode) {
     415             :     const uint8_t *s;
     416             :     uint8_t c;
     417             : 
     418             :     int32_t count;
     419             : 
     420           0 :     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
     421           0 :         return 0;
     422             :     }
     423           0 :     if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
     424           0 :         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
     425           0 :         return 0;
     426             :     }
     427             : 
     428             :     /* setup and checking */
     429           0 :     s=(const uint8_t *)inData;
     430           0 :     count=length;
     431           0 :     while(count>0) {
     432           0 :         c=*s++;
     433           0 :         if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) {
     434           0 :             udata_printError(ds, "uprv_copyEbcdic() string[%] contains a variant character in position %d\n",
     435           0 :                              length, length-count);
     436           0 :             *pErrorCode=U_INVALID_CHAR_FOUND;
     437           0 :             return 0;
     438             :         }
     439           0 :         --count;
     440             :     }
     441             : 
     442           0 :     if(length>0 && inData!=outData) {
     443           0 :         uprv_memcpy(outData, inData, length);
     444             :     }
     445             : 
     446           0 :     return length;
     447             : }
     448             : 
     449             : /* compare invariant strings; variant characters compare less than others and unlike each other */
     450             : U_CFUNC int32_t
     451           0 : uprv_compareInvAscii(const UDataSwapper *ds,
     452             :                      const char *outString, int32_t outLength,
     453             :                      const UChar *localString, int32_t localLength) {
     454             :     (void)ds;
     455             :     int32_t minLength;
     456             :     UChar32 c1, c2;
     457             :     uint8_t c;
     458             : 
     459           0 :     if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) {
     460           0 :         return 0;
     461             :     }
     462             : 
     463           0 :     if(outLength<0) {
     464           0 :         outLength=(int32_t)uprv_strlen(outString);
     465             :     }
     466           0 :     if(localLength<0) {
     467           0 :         localLength=u_strlen(localString);
     468             :     }
     469             : 
     470           0 :     minLength= outLength<localLength ? outLength : localLength;
     471             : 
     472           0 :     while(minLength>0) {
     473           0 :         c=(uint8_t)*outString++;
     474           0 :         if(UCHAR_IS_INVARIANT(c)) {
     475           0 :             c1=c;
     476             :         } else {
     477           0 :             c1=-1;
     478             :         }
     479             : 
     480           0 :         c2=*localString++;
     481           0 :         if(!UCHAR_IS_INVARIANT(c2)) {
     482           0 :             c2=-2;
     483             :         }
     484             : 
     485           0 :         if((c1-=c2)!=0) {
     486           0 :             return c1;
     487             :         }
     488             : 
     489           0 :         --minLength;
     490             :     }
     491             : 
     492             :     /* strings start with same prefix, compare lengths */
     493           0 :     return outLength-localLength;
     494             : }
     495             : 
     496             : U_CFUNC int32_t
     497           0 : uprv_compareInvEbcdic(const UDataSwapper *ds,
     498             :                       const char *outString, int32_t outLength,
     499             :                       const UChar *localString, int32_t localLength) {
     500             :     (void)ds;
     501             :     int32_t minLength;
     502             :     UChar32 c1, c2;
     503             :     uint8_t c;
     504             : 
     505           0 :     if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) {
     506           0 :         return 0;
     507             :     }
     508             : 
     509           0 :     if(outLength<0) {
     510           0 :         outLength=(int32_t)uprv_strlen(outString);
     511             :     }
     512           0 :     if(localLength<0) {
     513           0 :         localLength=u_strlen(localString);
     514             :     }
     515             : 
     516           0 :     minLength= outLength<localLength ? outLength : localLength;
     517             : 
     518           0 :     while(minLength>0) {
     519           0 :         c=(uint8_t)*outString++;
     520           0 :         if(c==0) {
     521           0 :             c1=0;
     522           0 :         } else if((c1=asciiFromEbcdic[c])!=0 && UCHAR_IS_INVARIANT(c1)) {
     523             :             /* c1 is set */
     524             :         } else {
     525           0 :             c1=-1;
     526             :         }
     527             : 
     528           0 :         c2=*localString++;
     529           0 :         if(!UCHAR_IS_INVARIANT(c2)) {
     530           0 :             c2=-2;
     531             :         }
     532             : 
     533           0 :         if((c1-=c2)!=0) {
     534           0 :             return c1;
     535             :         }
     536             : 
     537           0 :         --minLength;
     538             :     }
     539             : 
     540             :     /* strings start with same prefix, compare lengths */
     541           0 :     return outLength-localLength;
     542             : }
     543             : 
     544             : U_CAPI int32_t U_EXPORT2
     545           0 : uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2) {
     546             :     int32_t c1, c2;
     547             : 
     548           0 :     for(;; ++s1, ++s2) {
     549           0 :         c1=(uint8_t)*s1;
     550           0 :         c2=(uint8_t)*s2;
     551           0 :         if(c1!=c2) {
     552           0 :             if(c1!=0 && ((c1=asciiFromEbcdic[c1])==0 || !UCHAR_IS_INVARIANT(c1))) {
     553           0 :                 c1=-(int32_t)(uint8_t)*s1;
     554             :             }
     555           0 :             if(c2!=0 && ((c2=asciiFromEbcdic[c2])==0 || !UCHAR_IS_INVARIANT(c2))) {
     556           0 :                 c2=-(int32_t)(uint8_t)*s2;
     557             :             }
     558           0 :             return c1-c2;
     559           0 :         } else if(c1==0) {
     560           0 :             return 0;
     561             :         }
     562             :     }
     563             : }
     564             : 
     565             : U_CAPI char U_EXPORT2
     566           0 : uprv_ebcdicToLowercaseAscii(char c) {
     567           0 :     return (char)lowercaseAsciiFromEbcdic[(uint8_t)c];
     568             : }
     569             : 
     570             : U_INTERNAL uint8_t* U_EXPORT2
     571           0 : uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n)
     572             : {
     573           0 :   uint8_t *orig_dst = dst;
     574             : 
     575           0 :   if(n==-1) { 
     576           0 :     n = uprv_strlen((const char*)src)+1; /* copy NUL */
     577             :   }
     578             :   /* copy non-null */
     579           0 :   while(*src && n>0) {
     580           0 :     *(dst++) = asciiFromEbcdic[*(src++)];
     581           0 :     n--;
     582             :   }
     583             :   /* pad */
     584           0 :   while(n>0) {
     585           0 :     *(dst++) = 0;
     586           0 :     n--;
     587             :   }
     588           0 :   return orig_dst;
     589             : }
     590             : 
     591             : U_INTERNAL uint8_t* U_EXPORT2
     592           0 : uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n)
     593             : {
     594           0 :   uint8_t *orig_dst = dst;
     595             : 
     596           0 :   if(n==-1) { 
     597           0 :     n = uprv_strlen((const char*)src)+1; /* copy NUL */
     598             :   }
     599             :   /* copy non-null */
     600           0 :   while(*src && n>0) {
     601           0 :     char ch = ebcdicFromAscii[*(src++)];
     602           0 :     if(ch == 0) {
     603           0 :       ch = ebcdicFromAscii[0x3f]; /* questionmark (subchar) */
     604             :     }
     605           0 :     *(dst++) = ch;
     606           0 :     n--;
     607             :   }
     608             :   /* pad */
     609           0 :   while(n>0) {
     610           0 :     *(dst++) = 0;
     611           0 :     n--;
     612             :   }
     613           0 :   return orig_dst;
     614             : }
     615             : 

Generated by: LCOV version 1.13