LCOV - output.info - xpcom/string/nsCharTraits.h

LCOV - code coverage report

Current view:	top level - xpcom/string - nsCharTraits.h (source / functions)		Hit	Total	Coverage
Test:	output.info	Lines:	142	147	96.6 %
Date:	2017-07-14 16:53:18	Functions:	43	47	91.5 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
       2             : /* vim: set ts=8 sts=2 et sw=2 tw=80: */
       3             : /* This Source Code Form is subject to the terms of the Mozilla Public
       4             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       5             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
       6             : 
       7             : #ifndef nsCharTraits_h___
       8             : #define nsCharTraits_h___
       9             : 
      10             : #include <ctype.h> // for |EOF|, |WEOF|
      11             : #include <string.h> // for |memcpy|, et al
      12             : 
      13             : #include "nscore.h" // for |char16_t|
      14             : 
      15             : // This file may be used (through nsUTF8Utils.h) from non-XPCOM code, in
      16             : // particular the standalone software updater. In that case stub out
      17             : // the macros provided by nsDebug.h which are only usable when linking XPCOM
      18             : 
      19             : #ifdef NS_NO_XPCOM
      20             : #define NS_WARNING(msg)
      21             : #define NS_ASSERTION(cond, msg)
      22             : #define NS_ERROR(msg)
      23             : #else
      24             : #include "nsDebug.h"  // for NS_ASSERTION
      25             : #endif
      26             : 
      27             : /*
      28             :  * Some macros for converting char16_t (UTF-16) to and from Unicode scalar
      29             :  * values.
      30             :  *
      31             :  * Note that UTF-16 represents all Unicode scalar values up to U+10FFFF by
      32             :  * using "surrogate pairs". These consist of a high surrogate, i.e. a code
      33             :  * point in the range U+D800 - U+DBFF, and a low surrogate, i.e. a code point
      34             :  * in the range U+DC00 - U+DFFF, like this:
      35             :  *
      36             :  *  U+D800 U+DC00 =  U+10000
      37             :  *  U+D800 U+DC01 =  U+10001
      38             :  *  ...
      39             :  *  U+DBFF U+DFFE = U+10FFFE
      40             :  *  U+DBFF U+DFFF = U+10FFFF
      41             :  *
      42             :  * These surrogate code points U+D800 - U+DFFF are not themselves valid Unicode
      43             :  * scalar values and are not well-formed UTF-16 except as high-surrogate /
      44             :  * low-surrogate pairs.
      45             :  */
      46             : 
      47             : #define PLANE1_BASE          uint32_t(0x00010000)
      48             : // High surrogates are in the range 0xD800 -- OxDBFF
      49             : #define NS_IS_HIGH_SURROGATE(u) ((uint32_t(u) & 0xFFFFFC00) == 0xD800)
      50             : // Low surrogates are in the range 0xDC00 -- 0xDFFF
      51             : #define NS_IS_LOW_SURROGATE(u)  ((uint32_t(u) & 0xFFFFFC00) == 0xDC00)
      52             : // Faster than testing NS_IS_HIGH_SURROGATE || NS_IS_LOW_SURROGATE
      53             : #define IS_SURROGATE(u)      ((uint32_t(u) & 0xFFFFF800) == 0xD800)
      54             : 
      55             : // Everything else is not a surrogate: 0x000 -- 0xD7FF, 0xE000 -- 0xFFFF
      56             : 
      57             : // N = (H - 0xD800) * 0x400 + 0x10000 + (L - 0xDC00)
      58             : // I wonder whether we could somehow assert that H is a high surrogate
      59             : // and L is a low surrogate
      60             : #define SURROGATE_TO_UCS4(h, l) (((uint32_t(h) & 0x03FF) << 10) + \
      61             :                                  (uint32_t(l) & 0x03FF) + PLANE1_BASE)
      62             : 
      63             : // Extract surrogates from a UCS4 char
      64             : // Reference: the Unicode standard 4.0, section 3.9
      65             : // Since (c - 0x10000) >> 10 == (c >> 10) - 0x0080 and
      66             : // 0xD7C0 == 0xD800 - 0x0080,
      67             : // ((c - 0x10000) >> 10) + 0xD800 can be simplified to
      68             : #define H_SURROGATE(c) char16_t(char16_t(uint32_t(c) >> 10) + \
      69             :                                 char16_t(0xD7C0))
      70             : // where it's to be noted that 0xD7C0 is not bitwise-OR'd
      71             : // but added.
      72             : 
      73             : // Since 0x10000 & 0x03FF == 0,
      74             : // (c - 0x10000) & 0x03FF == c & 0x03FF so that
      75             : // ((c - 0x10000) & 0x03FF) | 0xDC00 is equivalent to
      76             : #define L_SURROGATE(c) char16_t(char16_t(uint32_t(c) & uint32_t(0x03FF)) | \
      77             :                                  char16_t(0xDC00))
      78             : 
      79             : #define IS_IN_BMP(ucs) (uint32_t(ucs) < PLANE1_BASE)
      80             : #define UCS2_REPLACEMENT_CHAR char16_t(0xFFFD)
      81             : 
      82             : #define UCS_END uint32_t(0x00110000)
      83             : #define IS_VALID_CHAR(c) ((uint32_t(c) < UCS_END) && !IS_SURROGATE(c))
      84             : #define ENSURE_VALID_CHAR(c) (IS_VALID_CHAR(c) ? (c) : UCS2_REPLACEMENT_CHAR)
      85             : 
      86             : template <class CharT>
      87             : struct nsCharTraits
      88             : {
      89             : };
      90             : 
      91             : template <>
      92             : struct nsCharTraits<char16_t>
      93             : {
      94             :   typedef char16_t char_type;
      95             :   typedef uint16_t  unsigned_char_type;
      96             :   typedef char      incompatible_char_type;
      97             : 
      98             :   static char_type* const sEmptyBuffer;
      99             : 
     100             :   // integer representation of characters:
     101             :   typedef int int_type;
     102             : 
     103             :   static char_type
     104             :   to_char_type(int_type aChar)
     105             :   {
     106             :     return char_type(aChar);
     107             :   }
     108             : 
     109             :   static int_type
     110      195934 :   to_int_type(char_type aChar)
     111             :   {
     112      195934 :     return int_type(static_cast<unsigned_char_type>(aChar));
     113             :   }
     114             : 
     115             :   static bool
     116        6835 :   eq_int_type(int_type aLhs, int_type aRhs)
     117             :   {
     118        6835 :     return aLhs == aRhs;
     119             :   }
     120             : 
     121             : 
     122             :   // |char_type| comparisons:
     123             : 
     124             :   static bool
     125      427978 :   eq(char_type aLhs, char_type aRhs)
     126             :   {
     127      427978 :     return aLhs == aRhs;
     128             :   }
     129             : 
     130             :   static bool
     131             :   lt(char_type aLhs, char_type aRhs)
     132             :   {
     133             :     return aLhs < aRhs;
     134             :   }
     135             : 
     136             : 
     137             :   // operations on s[n] arrays:
     138             : 
     139             :   static char_type*
     140        7012 :   move(char_type* aStr1, const char_type* aStr2, size_t aN)
     141             :   {
     142        7012 :     return static_cast<char_type*>(memmove(aStr1, aStr2,
     143        7012 :                                            aN * sizeof(char_type)));
     144             :   }
     145             : 
     146             :   static char_type*
     147       81370 :   copy(char_type* aStr1, const char_type* aStr2, size_t aN)
     148             :   {
     149       81370 :     return static_cast<char_type*>(memcpy(aStr1, aStr2,
     150       81370 :                                           aN * sizeof(char_type)));
     151             :   }
     152             : 
     153             :   static char_type*
     154        1853 :   copyASCII(char_type* aStr1, const char* aStr2, size_t aN)
     155             :   {
     156       17578 :     for (char_type* s = aStr1; aN--; ++s, ++aStr2) {
     157       15725 :       NS_ASSERTION(!(*aStr2 & ~0x7F), "Unexpected non-ASCII character");
     158       15725 :       *s = static_cast<char_type>(*aStr2);
     159             :     }
     160        1853 :     return aStr1;
     161             :   }
     162             : 
     163             :   static int
     164      333803 :   compare(const char_type* aStr1, const char_type* aStr2, size_t aN)
     165             :   {
     166      634166 :     for (; aN--; ++aStr1, ++aStr2) {
     167      315690 :       if (!eq(*aStr1, *aStr2)) {
     168       15327 :         return to_int_type(*aStr1) - to_int_type(*aStr2);
     169             :       }
     170             :     }
     171             : 
     172       18113 :     return 0;
     173             :   }
     174             : 
     175             :   static int
     176        8443 :   compareASCII(const char_type* aStr1, const char* aStr2, size_t aN)
     177             :   {
     178       14684 :     for (; aN--; ++aStr1, ++aStr2) {
     179        6423 :       NS_ASSERTION(!(*aStr2 & ~0x7F), "Unexpected non-ASCII character");
     180        6423 :       if (!eq_int_type(to_int_type(*aStr1),
     181        6423 :                        to_int_type(static_cast<char_type>(*aStr2)))) {
     182         182 :         return to_int_type(*aStr1) -
     183         182 :                to_int_type(static_cast<char_type>(*aStr2));
     184             :       }
     185             :     }
     186             : 
     187        2020 :     return 0;
     188             :   }
     189             : 
     190             :   // this version assumes that s2 is null-terminated and s1 has length n.
     191             :   // if s1 is shorter than s2 then we return -1; if s1 is longer than s2,
     192             :   // we return 1.
     193             :   static int
     194         444 :   compareASCIINullTerminated(const char_type* aStr1, size_t aN,
     195             :                              const char* aStr2)
     196             :   {
     197         785 :     for (; aN--; ++aStr1, ++aStr2) {
     198         412 :       if (!*aStr2) {
     199           0 :         return 1;
     200             :       }
     201         412 :       NS_ASSERTION(!(*aStr2 & ~0x7F), "Unexpected non-ASCII character");
     202         412 :       if (!eq_int_type(to_int_type(*aStr1),
     203         412 :                        to_int_type(static_cast<char_type>(*aStr2)))) {
     204          71 :         return to_int_type(*aStr1) -
     205          71 :                to_int_type(static_cast<char_type>(*aStr2));
     206             :       }
     207             :     }
     208             : 
     209          32 :     if (*aStr2) {
     210           8 :       return -1;
     211             :     }
     212             : 
     213          24 :     return 0;
     214             :   }
     215             : 
     216             :   /**
     217             :    * Convert c to its lower-case form, but only if c is in the ASCII
     218             :    * range. Otherwise leave it alone.
     219             :    */
     220             :   static char_type
     221      233637 :   ASCIIToLower(char_type aChar)
     222             :   {
     223      233637 :     if (aChar >= 'A' && aChar <= 'Z') {
     224        1299 :       return char_type(aChar + ('a' - 'A'));
     225             :     }
     226             : 
     227      232338 :     return aChar;
     228             :   }
     229             : 
     230             :   static int
     231      177087 :   compareLowerCaseToASCII(const char_type* aStr1, const char* aStr2, size_t aN)
     232             :   {
     233      327393 :     for (; aN--; ++aStr1, ++aStr2) {
     234      155980 :       NS_ASSERTION(!(*aStr2 & ~0x7F), "Unexpected non-ASCII character");
     235      155980 :       NS_ASSERTION(!(*aStr2 >= 'A' && *aStr2 <= 'Z'),
     236             :                    "Unexpected uppercase character");
     237      155980 :       char_type lower_s1 = ASCIIToLower(*aStr1);
     238      155980 :       if (lower_s1 != static_cast<char_type>(*aStr2)) {
     239        5674 :         return to_int_type(lower_s1) -
     240        5674 :                to_int_type(static_cast<char_type>(*aStr2));
     241             :       }
     242             :     }
     243             : 
     244       21107 :     return 0;
     245             :   }
     246             : 
     247             :   // this version assumes that s2 is null-terminated and s1 has length n.
     248             :   // if s1 is shorter than s2 then we return -1; if s1 is longer than s2,
     249             :   // we return 1.
     250             :   static int
     251       78250 :   compareLowerCaseToASCIINullTerminated(const char_type* aStr1,
     252             :                                         size_t aN, const char* aStr2)
     253             :   {
     254       86029 :     for (; aN--; ++aStr1, ++aStr2) {
     255       77666 :       if (!*aStr2) {
     256           9 :         return 1;
     257             :       }
     258       77657 :       NS_ASSERTION(!(*aStr2 & ~0x7F), "Unexpected non-ASCII character");
     259       77657 :       NS_ASSERTION(!(*aStr2 >= 'A' && *aStr2 <= 'Z'),
     260             :                    "Unexpected uppercase character");
     261       77657 :       char_type lower_s1 = ASCIIToLower(*aStr1);
     262       77657 :       if (lower_s1 != static_cast<char_type>(*aStr2)) {
     263       69878 :         return to_int_type(lower_s1) -
     264       69878 :                to_int_type(static_cast<char_type>(*aStr2));
     265             :       }
     266             :     }
     267             : 
     268         584 :     if (*aStr2) {
     269          36 :       return -1;
     270             :     }
     271             : 
     272         548 :     return 0;
     273             :   }
     274             : 
     275             :   static size_t
     276        7591 :   length(const char_type* aStr)
     277             :   {
     278        7591 :     size_t result = 0;
     279      210069 :     while (!eq(*aStr++, char_type(0))) {
     280      101239 :       ++result;
     281             :     }
     282        7591 :     return result;
     283             :   }
     284             : 
     285             :   static const char_type*
     286        3617 :   find(const char_type* aStr, size_t aN, char_type aChar)
     287             :   {
     288        6929 :     while (aN--) {
     289        3458 :       if (eq(*aStr, aChar)) {
     290         146 :         return aStr;
     291             :       }
     292        3312 :       ++aStr;
     293             :     }
     294             : 
     295         159 :     return 0;
     296             :   }
     297             : };
     298             : 
     299             : template <>
     300             : struct nsCharTraits<char>
     301             : {
     302             :   typedef char           char_type;
     303             :   typedef unsigned char  unsigned_char_type;
     304             :   typedef char16_t      incompatible_char_type;
     305             : 
     306             :   static char_type* const sEmptyBuffer;
     307             : 
     308             :   // integer representation of characters:
     309             : 
     310             :   typedef int int_type;
     311             : 
     312             :   static char_type
     313             :   to_char_type(int_type aChar)
     314             :   {
     315             :     return char_type(aChar);
     316             :   }
     317             : 
     318             :   static int_type
     319       19881 :   to_int_type(char_type aChar)
     320             :   {
     321       19881 :     return int_type(static_cast<unsigned_char_type>(aChar));
     322             :   }
     323             : 
     324             :   static bool
     325             :   eq_int_type(int_type aLhs, int_type aRhs)
     326             :   {
     327             :     return aLhs == aRhs;
     328             :   }
     329             : 
     330             : 
     331             :   // |char_type| comparisons:
     332             : 
     333             :   static bool eq(char_type aLhs, char_type aRhs)
     334             :   {
     335             :     return aLhs == aRhs;
     336             :   }
     337             : 
     338             :   static bool
     339             :   lt(char_type aLhs, char_type aRhs)
     340             :   {
     341             :     return aLhs < aRhs;
     342             :   }
     343             : 
     344             : 
     345             :   // operations on s[n] arrays:
     346             : 
     347             :   static char_type*
     348        4863 :   move(char_type* aStr1, const char_type* aStr2, size_t aN)
     349             :   {
     350             :     return static_cast<char_type*>(memmove(aStr1, aStr2,
     351        4863 :                                            aN * sizeof(char_type)));
     352             :   }
     353             : 
     354             :   static char_type*
     355      149055 :   copy(char_type* aStr1, const char_type* aStr2, size_t aN)
     356             :   {
     357             :     return static_cast<char_type*>(memcpy(aStr1, aStr2,
     358      149055 :                                           aN * sizeof(char_type)));
     359             :   }
     360             : 
     361             :   static char_type*
     362        6453 :   copyASCII(char_type* aStr1, const char* aStr2, size_t aN)
     363             :   {
     364        6453 :     return copy(aStr1, aStr2, aN);
     365             :   }
     366             : 
     367             :   static int
     368      285140 :   compare(const char_type* aStr1, const char_type* aStr2, size_t aN)
     369             :   {
     370      285140 :     return memcmp(aStr1, aStr2, aN);
     371             :   }
     372             : 
     373             :   static int
     374       10523 :   compareASCII(const char_type* aStr1, const char* aStr2, size_t aN)
     375             :   {
     376             : #ifdef DEBUG
     377       48523 :     for (size_t i = 0; i < aN; ++i) {
     378       38000 :       NS_ASSERTION(!(aStr2[i] & ~0x7F), "Unexpected non-ASCII character");
     379             :     }
     380             : #endif
     381       10523 :     return compare(aStr1, aStr2, aN);
     382             :   }
     383             : 
     384             :   // this version assumes that s2 is null-terminated and s1 has length n.
     385             :   // if s1 is shorter than s2 then we return -1; if s1 is longer than s2,
     386             :   // we return 1.
     387             :   static int
     388         418 :   compareASCIINullTerminated(const char_type* aStr1, size_t aN,
     389             :                              const char* aStr2)
     390             :   {
     391             :     // can't use strcmp here because we don't want to stop when aStr1
     392             :     // contains a null
     393         823 :     for (; aN--; ++aStr1, ++aStr2) {
     394         405 :       if (!*aStr2) {
     395           0 :         return 1;
     396             :       }
     397         405 :       NS_ASSERTION(!(*aStr2 & ~0x7F), "Unexpected non-ASCII character");
     398         405 :       if (*aStr1 != *aStr2) {
     399           0 :         return to_int_type(*aStr1) - to_int_type(*aStr2);
     400             :       }
     401             :     }
     402             : 
     403          13 :     if (*aStr2) {
     404           0 :       return -1;
     405             :     }
     406             : 
     407          13 :     return 0;
     408             :   }
     409             : 
     410             :   /**
     411             :    * Convert c to its lower-case form, but only if c is ASCII.
     412             :    */
     413             :   static char_type
     414       20237 :   ASCIIToLower(char_type aChar)
     415             :   {
     416       20237 :     if (aChar >= 'A' && aChar <= 'Z') {
     417           0 :       return char_type(aChar + ('a' - 'A'));
     418             :     }
     419             : 
     420       20237 :     return aChar;
     421             :   }
     422             : 
     423             :   static int
     424       22297 :   compareLowerCaseToASCII(const char_type* aStr1, const char* aStr2, size_t aN)
     425             :   {
     426       36965 :     for (; aN--; ++aStr1, ++aStr2) {
     427       18647 :       NS_ASSERTION(!(*aStr2 & ~0x7F), "Unexpected non-ASCII character");
     428       18647 :       NS_ASSERTION(!(*aStr2 >= 'A' && *aStr2 <= 'Z'),
     429             :                    "Unexpected uppercase character");
     430       18647 :       char_type lower_s1 = ASCIIToLower(*aStr1);
     431       18647 :       if (lower_s1 != *aStr2) {
     432        3979 :         return to_int_type(lower_s1) - to_int_type(*aStr2);
     433             :       }
     434             :     }
     435        3650 :     return 0;
     436             :   }
     437             : 
     438             :   // this version assumes that s2 is null-terminated and s1 has length n.
     439             :   // if s1 is shorter than s2 then we return -1; if s1 is longer than s2,
     440             :   // we return 1.
     441             :   static int
     442        1749 :   compareLowerCaseToASCIINullTerminated(const char_type* aStr1, size_t aN,
     443             :                                         const char* aStr2)
     444             :   {
     445        2295 :     for (; aN--; ++aStr1, ++aStr2) {
     446        1595 :       if (!*aStr2) {
     447           5 :         return 1;
     448             :       }
     449        1590 :       NS_ASSERTION(!(*aStr2 & ~0x7F), "Unexpected non-ASCII character");
     450        1590 :       NS_ASSERTION(!(*aStr2 >= 'A' && *aStr2 <= 'Z'),
     451             :                    "Unexpected uppercase character");
     452        1590 :       char_type lower_s1 = ASCIIToLower(*aStr1);
     453        1590 :       if (lower_s1 != *aStr2) {
     454        1044 :         return to_int_type(lower_s1) - to_int_type(*aStr2);
     455             :       }
     456             :     }
     457             : 
     458         154 :     if (*aStr2) {
     459          28 :       return -1;
     460             :     }
     461             : 
     462         126 :     return 0;
     463             :   }
     464             : 
     465             :   static size_t
     466      118467 :   length(const char_type* aStr)
     467             :   {
     468      118467 :     return strlen(aStr);
     469             :   }
     470             : 
     471             :   static const char_type*
     472        9835 :   find(const char_type* aStr, size_t aN, char_type aChar)
     473             :   {
     474        9835 :     return reinterpret_cast<const char_type*>(memchr(aStr, to_int_type(aChar),
     475        9835 :                                                      aN));
     476             :   }
     477             : };
     478             : 
     479             : template <class InputIterator>
     480             : struct nsCharSourceTraits
     481             : {
     482             :   typedef typename InputIterator::difference_type difference_type;
     483             : 
     484             :   static uint32_t
     485       31724 :   readable_distance(const InputIterator& aFirst, const InputIterator& aLast)
     486             :   {
     487             :     // assumes single fragment
     488       31724 :     return uint32_t(aLast.get() - aFirst.get());
     489             :   }
     490             : 
     491             :   static const typename InputIterator::value_type*
     492       31724 :   read(const InputIterator& aIter)
     493             :   {
     494       31724 :     return aIter.get();
     495             :   }
     496             : 
     497             :   static void
     498             :   advance(InputIterator& aStr, difference_type aN)
     499             :   {
     500             :     aStr.advance(aN);
     501             :   }
     502             : };
     503             : 
     504             : template <class CharT>
     505             : struct nsCharSourceTraits<CharT*>
     506             : {
     507             :   typedef ptrdiff_t difference_type;
     508             : 
     509             :   static uint32_t
     510             :   readable_distance(CharT* aStr)
     511             :   {
     512             :     return uint32_t(nsCharTraits<CharT>::length(aStr));
     513             :     // return numeric_limits<uint32_t>::max();
     514             :   }
     515             : 
     516             :   static uint32_t
     517         172 :   readable_distance(CharT* aFirst, CharT* aLast)
     518             :   {
     519         172 :     return uint32_t(aLast - aFirst);
     520             :   }
     521             : 
     522             :   static const CharT*
     523         172 :   read(CharT* aStr)
     524             :   {
     525         172 :     return aStr;
     526             :   }
     527             : 
     528             :   static void
     529             :   advance(CharT*& aStr, difference_type aN)
     530             :   {
     531             :     aStr += aN;
     532             :   }
     533             : };
     534             : 
     535             : template <class OutputIterator>
     536             : struct nsCharSinkTraits
     537             : {
     538             :   static void
     539       20978 :   write(OutputIterator& aIter, const typename OutputIterator::value_type* aStr,
     540             :         uint32_t aN)
     541             :   {
     542       20978 :     aIter.write(aStr, aN);
     543       20978 :   }
     544             : };
     545             : 
     546             : template <class CharT>
     547             : struct nsCharSinkTraits<CharT*>
     548             : {
     549             :   static void
     550       10918 :   write(CharT*& aIter, const CharT* aStr, uint32_t aN)
     551             :   {
     552       10918 :     nsCharTraits<CharT>::move(aIter, aStr, aN);
     553       10918 :     aIter += aN;
     554       10918 :   }
     555             : };
     556             : 
     557             : #endif // !defined(nsCharTraits_h___)

Generated by: LCOV version 1.13