LCOV - code coverage report
Current view: top level - xpcom/io - nsNativeCharsetUtils.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 132 183 72.1 %
Date: 2017-07-14 16:53:18 Functions: 16 20 80.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
       2             : /* vim: set ts=8 sts=2 et sw=2 tw=80: */
       3             : /* This Source Code Form is subject to the terms of the Mozilla Public
       4             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       5             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
       6             : 
       7             : #include "xpcom-private.h"
       8             : 
       9             : //-----------------------------------------------------------------------------
      10             : // XP_MACOSX or ANDROID
      11             : //-----------------------------------------------------------------------------
      12             : #if defined(XP_MACOSX) || defined(ANDROID)
      13             : 
      14             : #include "nsAString.h"
      15             : #include "nsReadableUtils.h"
      16             : #include "nsString.h"
      17             : 
      18             : nsresult
      19             : NS_CopyNativeToUnicode(const nsACString& aInput, nsAString& aOutput)
      20             : {
      21             :   CopyUTF8toUTF16(aInput, aOutput);
      22             :   return NS_OK;
      23             : }
      24             : 
      25             : nsresult
      26             : NS_CopyUnicodeToNative(const nsAString&  aInput, nsACString& aOutput)
      27             : {
      28             :   CopyUTF16toUTF8(aInput, aOutput);
      29             :   return NS_OK;
      30             : }
      31             : 
      32             : void
      33             : NS_StartupNativeCharsetUtils()
      34             : {
      35             : }
      36             : 
      37             : void
      38             : NS_ShutdownNativeCharsetUtils()
      39             : {
      40             : }
      41             : 
      42             : 
      43             : //-----------------------------------------------------------------------------
      44             : // XP_UNIX
      45             : //-----------------------------------------------------------------------------
      46             : #elif defined(XP_UNIX)
      47             : 
      48             : #include <stdlib.h>   // mbtowc, wctomb
      49             : #include <locale.h>   // setlocale
      50             : #include "mozilla/Mutex.h"
      51             : #include "nscore.h"
      52             : #include "nsAString.h"
      53             : #include "nsReadableUtils.h"
      54             : 
      55             : using namespace mozilla;
      56             : 
      57             : //
      58             : // choose a conversion library.  we used to use mbrtowc/wcrtomb under Linux,
      59             : // but that doesn't work for non-BMP characters whether we use '-fshort-wchar'
      60             : // or not (see bug 206811 and
      61             : // news://news.mozilla.org:119/bajml3$fvr1@ripley.netscape.com). we now use
      62             : // iconv for all platforms where nltypes.h and nllanginfo.h are present
      63             : // along with iconv.
      64             : //
      65             : #if defined(HAVE_ICONV) && defined(HAVE_NL_TYPES_H) && defined(HAVE_LANGINFO_CODESET)
      66             : #define USE_ICONV 1
      67             : #else
      68             : #define USE_STDCONV 1
      69             : #endif
      70             : 
      71             : static void
      72           0 : isolatin1_to_utf16(const char** aInput, uint32_t* aInputLeft,
      73             :                    char16_t** aOutput, uint32_t* aOutputLeft)
      74             : {
      75           0 :   while (*aInputLeft && *aOutputLeft) {
      76           0 :     **aOutput = (unsigned char)** aInput;
      77           0 :     (*aInput)++;
      78           0 :     (*aInputLeft)--;
      79           0 :     (*aOutput)++;
      80           0 :     (*aOutputLeft)--;
      81             :   }
      82           0 : }
      83             : 
      84             : static void
      85           0 : utf16_to_isolatin1(const char16_t** aInput, uint32_t* aInputLeft,
      86             :                    char** aOutput, uint32_t* aOutputLeft)
      87             : {
      88           0 :   while (*aInputLeft && *aOutputLeft) {
      89           0 :     **aOutput = (unsigned char)**aInput;
      90           0 :     (*aInput)++;
      91           0 :     (*aInputLeft)--;
      92           0 :     (*aOutput)++;
      93           0 :     (*aOutputLeft)--;
      94             :   }
      95           0 : }
      96             : 
      97             : //-----------------------------------------------------------------------------
      98             : // conversion using iconv
      99             : //-----------------------------------------------------------------------------
     100             : #if defined(USE_ICONV)
     101             : #include <nl_types.h> // CODESET
     102             : #include <langinfo.h> // nl_langinfo
     103             : #include <iconv.h>    // iconv_open, iconv, iconv_close
     104             : #include <errno.h>
     105             : #include "plstr.h"
     106             : 
     107             : #if defined(HAVE_ICONV_WITH_CONST_INPUT)
     108             : #define ICONV_INPUT(x) (x)
     109             : #else
     110             : #define ICONV_INPUT(x) ((char **)x)
     111             : #endif
     112             : 
     113             : // solaris definitely needs this, but we'll enable it by default
     114             : // just in case... but we know for sure that iconv(3) in glibc
     115             : // doesn't need this.
     116             : #if !defined(__GLIBC__)
     117             : #define ENABLE_UTF8_FALLBACK_SUPPORT
     118             : #endif
     119             : 
     120             : #define INVALID_ICONV_T ((iconv_t)-1)
     121             : 
     122             : static inline size_t
     123       17076 : xp_iconv(iconv_t converter,
     124             :          const char** aInput, size_t* aInputLeft,
     125             :          char** aOutput, size_t* aOutputLeft)
     126             : {
     127       17076 :   size_t res, outputAvail = *aOutputLeft;
     128       17076 :   res = iconv(converter, ICONV_INPUT(aInput), aInputLeft, aOutput, aOutputLeft);
     129       17076 :   if (res == (size_t)-1) {
     130             :     // on some platforms (e.g., linux) iconv will fail with
     131             :     // E2BIG if it cannot convert _all_ of its input.  it'll
     132             :     // still adjust all of the in/out params correctly, so we
     133             :     // can ignore this error.  the assumption is that we will
     134             :     // be called again to complete the conversion.
     135           0 :     if ((errno == E2BIG) && (*aOutputLeft < outputAvail)) {
     136           0 :       res = 0;
     137             :     }
     138             :   }
     139       17076 :   return res;
     140             : }
     141             : 
     142             : static inline void
     143       11382 : xp_iconv_reset(iconv_t converter)
     144             : {
     145             :   // NOTE: the man pages on Solaris claim that you can pass nullptr
     146             :   // for all parameter to reset the converter, but beware the
     147             :   // evil Solaris crash if you go down this route >:-)
     148             : 
     149       11382 :   const char* zero_char_in_ptr  = nullptr;
     150       11382 :   char* zero_char_out_ptr = nullptr;
     151       11382 :   size_t zero_size_in = 0;
     152       11382 :   size_t zero_size_out = 0;
     153             : 
     154             :   xp_iconv(converter,
     155             :            &zero_char_in_ptr,
     156             :            &zero_size_in,
     157             :            &zero_char_out_ptr,
     158       11382 :            &zero_size_out);
     159       11382 : }
     160             : 
     161             : static inline iconv_t
     162           6 : xp_iconv_open(const char** to_list, const char** from_list)
     163             : {
     164             :   iconv_t res;
     165             :   const char** from_name;
     166             :   const char** to_name;
     167             : 
     168             :   // try all possible combinations to locate a converter.
     169           6 :   to_name = to_list;
     170           6 :   while (*to_name) {
     171           6 :     if (**to_name) {
     172           6 :       from_name = from_list;
     173           6 :       while (*from_name) {
     174           6 :         if (**from_name) {
     175           6 :           res = iconv_open(*to_name, *from_name);
     176           6 :           if (res != INVALID_ICONV_T) {
     177           6 :             return res;
     178             :           }
     179             :         }
     180           0 :         from_name++;
     181             :       }
     182             :     }
     183           0 :     to_name++;
     184             :   }
     185             : 
     186           0 :   return INVALID_ICONV_T;
     187             : }
     188             : 
     189             : /*
     190             :  * char16_t[] is NOT a UCS-2 array BUT a UTF-16 string. Therefore, we
     191             :  * have to use UTF-16 with iconv(3) on platforms where it's supported.
     192             :  * However, the way UTF-16 and UCS-2 are interpreted varies across platforms
     193             :  * and implementations of iconv(3). On Tru64, it also depends on the environment
     194             :  * variable. To avoid the trouble arising from byte-swapping
     195             :  * (bug 208809), we have to try UTF-16LE/BE and UCS-2LE/BE before falling
     196             :  * back to UTF-16 and UCS-2 and variants. We assume that UTF-16 and UCS-2
     197             :  * on systems without UTF-16LE/BE and UCS-2LE/BE have the native endianness,
     198             :  * which isn't the case of glibc 2.1.x, for which we use 'UNICODELITTLE'
     199             :  * and 'UNICODEBIG'. It's also not true of Tru64 V4 when the environment
     200             :  * variable ICONV_BYTEORDER is set to 'big-endian', about which not much
     201             :  * can be done other than adding a note in the release notes. (bug 206811)
     202             :  */
     203             : static const char* UTF_16_NAMES[] = {
     204             : #if defined(IS_LITTLE_ENDIAN)
     205             :   "UTF-16LE",
     206             : #if defined(__GLIBC__)
     207             :   "UNICODELITTLE",
     208             : #endif
     209             :   "UCS-2LE",
     210             : #else
     211             :   "UTF-16BE",
     212             : #if defined(__GLIBC__)
     213             :   "UNICODEBIG",
     214             : #endif
     215             :   "UCS-2BE",
     216             : #endif
     217             :   "UTF-16",
     218             :   "UCS-2",
     219             :   "UCS2",
     220             :   "UCS_2",
     221             :   "ucs-2",
     222             :   "ucs2",
     223             :   "ucs_2",
     224             :   nullptr
     225             : };
     226             : 
     227             : #if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
     228             : static const char* UTF_8_NAMES[] = {
     229             :   "UTF-8",
     230             :   "UTF8",
     231             :   "UTF_8",
     232             :   "utf-8",
     233             :   "utf8",
     234             :   "utf_8",
     235             :   nullptr
     236             : };
     237             : #endif
     238             : 
     239             : static const char* ISO_8859_1_NAMES[] = {
     240             :   "ISO-8859-1",
     241             : #if !defined(__GLIBC__)
     242             :   "ISO8859-1",
     243             :   "ISO88591",
     244             :   "ISO_8859_1",
     245             :   "ISO8859_1",
     246             :   "iso-8859-1",
     247             :   "iso8859-1",
     248             :   "iso88591",
     249             :   "iso_8859_1",
     250             :   "iso8859_1",
     251             : #endif
     252             :   nullptr
     253             : };
     254             : 
     255             : class nsNativeCharsetConverter
     256             : {
     257             : public:
     258             :   nsNativeCharsetConverter();
     259             :   ~nsNativeCharsetConverter();
     260             : 
     261             :   nsresult NativeToUnicode(const char** aInput, uint32_t* aInputLeft,
     262             :                            char16_t** aOutput, uint32_t* aOutputLeft);
     263             :   nsresult UnicodeToNative(const char16_t** aInput, uint32_t* aInputLeft,
     264             :                            char** aOutput, uint32_t* aOutputLeft);
     265             : 
     266             :   static void GlobalInit();
     267             :   static void GlobalShutdown();
     268             :   static bool IsNativeUTF8();
     269             : 
     270             : private:
     271             :   static iconv_t gNativeToUnicode;
     272             :   static iconv_t gUnicodeToNative;
     273             : #if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
     274             :   static iconv_t gNativeToUTF8;
     275             :   static iconv_t gUTF8ToNative;
     276             :   static iconv_t gUnicodeToUTF8;
     277             :   static iconv_t gUTF8ToUnicode;
     278             : #endif
     279             :   static Mutex*  gLock;
     280             :   static bool    gInitialized;
     281             :   static bool    gIsNativeUTF8;
     282             : 
     283             :   static void LazyInit();
     284             : 
     285        5691 :   static void Lock()
     286             :   {
     287        5691 :     if (gLock) {
     288        5678 :       gLock->Lock();
     289             :     }
     290        5691 :   }
     291        5691 :   static void Unlock()
     292             :   {
     293        5691 :     if (gLock) {
     294        5678 :       gLock->Unlock();
     295             :     }
     296        5691 :   }
     297             : };
     298             : 
     299             : iconv_t nsNativeCharsetConverter::gNativeToUnicode = INVALID_ICONV_T;
     300             : iconv_t nsNativeCharsetConverter::gUnicodeToNative = INVALID_ICONV_T;
     301             : #if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
     302             : iconv_t nsNativeCharsetConverter::gNativeToUTF8    = INVALID_ICONV_T;
     303             : iconv_t nsNativeCharsetConverter::gUTF8ToNative    = INVALID_ICONV_T;
     304             : iconv_t nsNativeCharsetConverter::gUnicodeToUTF8   = INVALID_ICONV_T;
     305             : iconv_t nsNativeCharsetConverter::gUTF8ToUnicode   = INVALID_ICONV_T;
     306             : #endif
     307             : Mutex*  nsNativeCharsetConverter::gLock            = nullptr;
     308             : bool    nsNativeCharsetConverter::gInitialized     = false;
     309             : bool    nsNativeCharsetConverter::gIsNativeUTF8    = false;
     310             : 
     311             : void
     312           3 : nsNativeCharsetConverter::LazyInit()
     313             : {
     314             :   // LazyInit may be called before NS_StartupNativeCharsetUtils, but
     315             :   // the setlocale it does has to be called before nl_langinfo. Like in
     316             :   // NS_StartupNativeCharsetUtils, assume we are called early enough that
     317             :   // we are the first to care about the locale's charset.
     318           3 :   if (!gLock) {
     319           1 :     setlocale(LC_CTYPE, "");
     320             :   }
     321           3 :   const char* blank_list[] = { "", nullptr };
     322           3 :   const char** native_charset_list = blank_list;
     323           3 :   const char* native_charset = nl_langinfo(CODESET);
     324           3 :   if (!native_charset) {
     325           0 :     NS_ERROR("native charset is unknown");
     326             :     // fallback to ISO-8859-1
     327           0 :     native_charset_list = ISO_8859_1_NAMES;
     328             :   } else {
     329           3 :     native_charset_list[0] = native_charset;
     330             :   }
     331             : 
     332             :   // Most, if not all, Unixen supporting UTF-8 and nl_langinfo(CODESET)
     333             :   // return 'UTF-8' (or 'utf-8')
     334           3 :   if (!PL_strcasecmp(native_charset, "UTF-8")) {
     335           3 :     gIsNativeUTF8 = true;
     336             :   }
     337             : 
     338           3 :   gNativeToUnicode = xp_iconv_open(UTF_16_NAMES, native_charset_list);
     339           3 :   gUnicodeToNative = xp_iconv_open(native_charset_list, UTF_16_NAMES);
     340             : 
     341             : #if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
     342             :   if (gNativeToUnicode == INVALID_ICONV_T) {
     343             :     gNativeToUTF8 = xp_iconv_open(UTF_8_NAMES, native_charset_list);
     344             :     gUTF8ToUnicode = xp_iconv_open(UTF_16_NAMES, UTF_8_NAMES);
     345             :     NS_ASSERTION(gNativeToUTF8 != INVALID_ICONV_T, "no native to utf-8 converter");
     346             :     NS_ASSERTION(gUTF8ToUnicode != INVALID_ICONV_T, "no utf-8 to utf-16 converter");
     347             :   }
     348             :   if (gUnicodeToNative == INVALID_ICONV_T) {
     349             :     gUnicodeToUTF8 = xp_iconv_open(UTF_8_NAMES, UTF_16_NAMES);
     350             :     gUTF8ToNative = xp_iconv_open(native_charset_list, UTF_8_NAMES);
     351             :     NS_ASSERTION(gUnicodeToUTF8 != INVALID_ICONV_T, "no utf-16 to utf-8 converter");
     352             :     NS_ASSERTION(gUTF8ToNative != INVALID_ICONV_T, "no utf-8 to native converter");
     353             :   }
     354             : #else
     355           3 :   NS_ASSERTION(gNativeToUnicode != INVALID_ICONV_T, "no native to utf-16 converter");
     356           3 :   NS_ASSERTION(gUnicodeToNative != INVALID_ICONV_T, "no utf-16 to native converter");
     357             : #endif
     358             : 
     359             :   /*
     360             :    * On Solaris 8 (and newer?), the iconv modules converting to UCS-2
     361             :    * prepend a byte order mark unicode character (BOM, u+FEFF) during
     362             :    * the first use of the iconv converter. The same is the case of
     363             :    * glibc 2.2.9x and Tru64 V5 (see bug 208809) when 'UTF-16' is used.
     364             :    * However, we use 'UTF-16LE/BE' in both cases, instead so that we
     365             :    * should be safe. But just in case...
     366             :    *
     367             :    * This dummy conversion gets rid of the BOMs and fixes bug 153562.
     368             :    */
     369           3 :   char dummy_input[1] = { ' ' };
     370             :   char dummy_output[4];
     371             : 
     372           3 :   if (gNativeToUnicode != INVALID_ICONV_T) {
     373           3 :     const char* input = dummy_input;
     374           3 :     size_t input_left = sizeof(dummy_input);
     375           3 :     char* output = dummy_output;
     376           3 :     size_t output_left = sizeof(dummy_output);
     377             : 
     378           3 :     xp_iconv(gNativeToUnicode, &input, &input_left, &output, &output_left);
     379             :   }
     380             : #if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
     381             :   if (gUTF8ToUnicode != INVALID_ICONV_T) {
     382             :     const char* input = dummy_input;
     383             :     size_t input_left = sizeof(dummy_input);
     384             :     char* output = dummy_output;
     385             :     size_t output_left = sizeof(dummy_output);
     386             : 
     387             :     xp_iconv(gUTF8ToUnicode, &input, &input_left, &output, &output_left);
     388             :   }
     389             : #endif
     390             : 
     391           3 :   gInitialized = true;
     392           3 : }
     393             : 
     394             : void
     395           3 : nsNativeCharsetConverter::GlobalInit()
     396             : {
     397           3 :   gLock = new Mutex("nsNativeCharsetConverter.gLock");
     398           3 : }
     399             : 
     400             : void
     401           0 : nsNativeCharsetConverter::GlobalShutdown()
     402             : {
     403           0 :   delete gLock;
     404           0 :   gLock = nullptr;
     405             : 
     406           0 :   if (gNativeToUnicode != INVALID_ICONV_T) {
     407           0 :     iconv_close(gNativeToUnicode);
     408           0 :     gNativeToUnicode = INVALID_ICONV_T;
     409             :   }
     410             : 
     411           0 :   if (gUnicodeToNative != INVALID_ICONV_T) {
     412           0 :     iconv_close(gUnicodeToNative);
     413           0 :     gUnicodeToNative = INVALID_ICONV_T;
     414             :   }
     415             : 
     416             : #if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
     417             :   if (gNativeToUTF8 != INVALID_ICONV_T) {
     418             :     iconv_close(gNativeToUTF8);
     419             :     gNativeToUTF8 = INVALID_ICONV_T;
     420             :   }
     421             :   if (gUTF8ToNative != INVALID_ICONV_T) {
     422             :     iconv_close(gUTF8ToNative);
     423             :     gUTF8ToNative = INVALID_ICONV_T;
     424             :   }
     425             :   if (gUnicodeToUTF8 != INVALID_ICONV_T) {
     426             :     iconv_close(gUnicodeToUTF8);
     427             :     gUnicodeToUTF8 = INVALID_ICONV_T;
     428             :   }
     429             :   if (gUTF8ToUnicode != INVALID_ICONV_T) {
     430             :     iconv_close(gUTF8ToUnicode);
     431             :     gUTF8ToUnicode = INVALID_ICONV_T;
     432             :   }
     433             : #endif
     434             : 
     435           0 :   gInitialized = false;
     436           0 : }
     437             : 
     438        5691 : nsNativeCharsetConverter::nsNativeCharsetConverter()
     439             : {
     440        5691 :   Lock();
     441        5691 :   if (!gInitialized) {
     442           3 :     LazyInit();
     443             :   }
     444        5691 : }
     445             : 
     446        5691 : nsNativeCharsetConverter::~nsNativeCharsetConverter()
     447             : {
     448             :   // reset converters for next time
     449        5691 :   if (gNativeToUnicode != INVALID_ICONV_T) {
     450        5691 :     xp_iconv_reset(gNativeToUnicode);
     451             :   }
     452        5691 :   if (gUnicodeToNative != INVALID_ICONV_T) {
     453        5691 :     xp_iconv_reset(gUnicodeToNative);
     454             :   }
     455             : #if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
     456             :   if (gNativeToUTF8 != INVALID_ICONV_T) {
     457             :     xp_iconv_reset(gNativeToUTF8);
     458             :   }
     459             :   if (gUTF8ToNative != INVALID_ICONV_T) {
     460             :     xp_iconv_reset(gUTF8ToNative);
     461             :   }
     462             :   if (gUnicodeToUTF8 != INVALID_ICONV_T) {
     463             :     xp_iconv_reset(gUnicodeToUTF8);
     464             :   }
     465             :   if (gUTF8ToUnicode != INVALID_ICONV_T) {
     466             :     xp_iconv_reset(gUTF8ToUnicode);
     467             :   }
     468             : #endif
     469        5691 :   Unlock();
     470        5691 : }
     471             : 
     472             : nsresult
     473        3003 : nsNativeCharsetConverter::NativeToUnicode(const char** aInput,
     474             :                                           uint32_t* aInputLeft,
     475             :                                           char16_t** aOutput,
     476             :                                           uint32_t* aOutputLeft)
     477             : {
     478        3003 :   size_t res = 0;
     479        3003 :   size_t inLeft = (size_t)*aInputLeft;
     480        3003 :   size_t outLeft = (size_t)*aOutputLeft * 2;
     481             : 
     482        3003 :   if (gNativeToUnicode != INVALID_ICONV_T) {
     483             : 
     484        3003 :     res = xp_iconv(gNativeToUnicode, aInput, &inLeft, (char**)aOutput, &outLeft);
     485             : 
     486        3003 :     *aInputLeft = inLeft;
     487        3003 :     *aOutputLeft = outLeft / 2;
     488        3003 :     if (res != (size_t)-1) {
     489        3003 :       return NS_OK;
     490             :     }
     491             : 
     492           0 :     NS_WARNING("conversion from native to utf-16 failed");
     493             : 
     494             :     // reset converter
     495           0 :     xp_iconv_reset(gNativeToUnicode);
     496             :   }
     497             : #if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
     498             :   else if ((gNativeToUTF8 != INVALID_ICONV_T) &&
     499             :            (gUTF8ToUnicode != INVALID_ICONV_T)) {
     500             :     // convert first to UTF8, then from UTF8 to UCS2
     501             :     const char* in = *aInput;
     502             : 
     503             :     char ubuf[1024];
     504             : 
     505             :     // we assume we're always called with enough space in |aOutput|,
     506             :     // so convert many chars at a time...
     507             :     while (inLeft) {
     508             :       char* p = ubuf;
     509             :       size_t n = sizeof(ubuf);
     510             :       res = xp_iconv(gNativeToUTF8, &in, &inLeft, &p, &n);
     511             :       if (res == (size_t)-1) {
     512             :         NS_ERROR("conversion from native to utf-8 failed");
     513             :         break;
     514             :       }
     515             :       NS_ASSERTION(outLeft > 0, "bad assumption");
     516             :       p = ubuf;
     517             :       n = sizeof(ubuf) - n;
     518             :       res = xp_iconv(gUTF8ToUnicode, (const char**)&p, &n,
     519             :                      (char**)aOutput, &outLeft);
     520             :       if (res == (size_t)-1) {
     521             :         NS_ERROR("conversion from utf-8 to utf-16 failed");
     522             :         break;
     523             :       }
     524             :     }
     525             : 
     526             :     (*aInput) += (*aInputLeft - inLeft);
     527             :     *aInputLeft = inLeft;
     528             :     *aOutputLeft = outLeft / 2;
     529             : 
     530             :     if (res != (size_t)-1) {
     531             :       return NS_OK;
     532             :     }
     533             : 
     534             :     // reset converters
     535             :     xp_iconv_reset(gNativeToUTF8);
     536             :     xp_iconv_reset(gUTF8ToUnicode);
     537             :   }
     538             : #endif
     539             : 
     540             :   // fallback: zero-pad and hope for the best
     541             :   // XXX This is lame and we have to do better.
     542           0 :   isolatin1_to_utf16(aInput, aInputLeft, aOutput, aOutputLeft);
     543             : 
     544           0 :   return NS_OK;
     545             : }
     546             : 
     547             : nsresult
     548        2688 : nsNativeCharsetConverter::UnicodeToNative(const char16_t** aInput,
     549             :                                           uint32_t* aInputLeft,
     550             :                                           char** aOutput,
     551             :                                           uint32_t* aOutputLeft)
     552             : {
     553        2688 :   size_t res = 0;
     554        2688 :   size_t inLeft = (size_t)*aInputLeft * 2;
     555        2688 :   size_t outLeft = (size_t)*aOutputLeft;
     556             : 
     557        2688 :   if (gUnicodeToNative != INVALID_ICONV_T) {
     558        2688 :     res = xp_iconv(gUnicodeToNative, (const char**)aInput, &inLeft,
     559        2688 :                    aOutput, &outLeft);
     560             : 
     561        2688 :     *aInputLeft = inLeft / 2;
     562        2688 :     *aOutputLeft = outLeft;
     563        2688 :     if (res != (size_t)-1) {
     564        2688 :       return NS_OK;
     565             :     }
     566             : 
     567           0 :     NS_ERROR("iconv failed");
     568             : 
     569             :     // reset converter
     570           0 :     xp_iconv_reset(gUnicodeToNative);
     571             :   }
     572             : #if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
     573             :   else if ((gUnicodeToUTF8 != INVALID_ICONV_T) &&
     574             :            (gUTF8ToNative != INVALID_ICONV_T)) {
     575             :     const char* in = (const char*)*aInput;
     576             : 
     577             :     char ubuf[6]; // max utf-8 char length (really only needs to be 4 bytes)
     578             : 
     579             :     // convert one uchar at a time...
     580             :     while (inLeft && outLeft) {
     581             :       char* p = ubuf;
     582             :       size_t n = sizeof(ubuf), one_uchar = sizeof(char16_t);
     583             :       res = xp_iconv(gUnicodeToUTF8, &in, &one_uchar, &p, &n);
     584             :       if (res == (size_t)-1) {
     585             :         NS_ERROR("conversion from utf-16 to utf-8 failed");
     586             :         break;
     587             :       }
     588             :       p = ubuf;
     589             :       n = sizeof(ubuf) - n;
     590             :       res = xp_iconv(gUTF8ToNative, (const char**)&p, &n, aOutput, &outLeft);
     591             :       if (res == (size_t)-1) {
     592             :         if (errno == E2BIG) {
     593             :           // not enough room for last uchar... back up and return.
     594             :           in -= sizeof(char16_t);
     595             :           res = 0;
     596             :         } else {
     597             :           NS_ERROR("conversion from utf-8 to native failed");
     598             :         }
     599             :         break;
     600             :       }
     601             :       inLeft -= sizeof(char16_t);
     602             :     }
     603             : 
     604             :     (*aInput) += (*aInputLeft - inLeft / 2);
     605             :     *aInputLeft = inLeft / 2;
     606             :     *aOutputLeft = outLeft;
     607             :     if (res != (size_t)-1) {
     608             :       return NS_OK;
     609             :     }
     610             : 
     611             :     // reset converters
     612             :     xp_iconv_reset(gUnicodeToUTF8);
     613             :     xp_iconv_reset(gUTF8ToNative);
     614             :   }
     615             : #endif
     616             : 
     617             :   // fallback: truncate and hope for the best
     618             :   // XXX This is lame and we have to do better.
     619           0 :   utf16_to_isolatin1(aInput, aInputLeft, aOutput, aOutputLeft);
     620             : 
     621           0 :   return NS_OK;
     622             : }
     623             : 
     624             : bool
     625        2655 : nsNativeCharsetConverter::IsNativeUTF8()
     626             : {
     627        2655 :   if (!gInitialized) {
     628           0 :     Lock();
     629           0 :     if (!gInitialized) {
     630           0 :       LazyInit();
     631             :     }
     632           0 :     Unlock();
     633             :   }
     634        2655 :   return gIsNativeUTF8;
     635             : }
     636             : 
     637             : #endif // USE_ICONV
     638             : 
     639             : //-----------------------------------------------------------------------------
     640             : // conversion using mb[r]towc/wc[r]tomb
     641             : //-----------------------------------------------------------------------------
     642             : #if defined(USE_STDCONV)
     643             : #if defined(HAVE_WCRTOMB) || defined(HAVE_MBRTOWC)
     644             : #include <wchar.h>    // mbrtowc, wcrtomb
     645             : #endif
     646             : 
     647             : class nsNativeCharsetConverter
     648             : {
     649             : public:
     650             :   nsNativeCharsetConverter();
     651             : 
     652             :   nsresult NativeToUnicode(const char** aInput, uint32_t* aInputLeft,
     653             :                            char16_t** aOutput, uint32_t* aOutputLeft);
     654             :   nsresult UnicodeToNative(const char16_t** aInput, uint32_t* aInputLeft,
     655             :                            char** aOutput, uint32_t* aOutputLeft);
     656             : 
     657             :   static void GlobalInit();
     658             :   static void GlobalShutdown() { }
     659             :   static bool IsNativeUTF8();
     660             : 
     661             : private:
     662             :   static bool gWCharIsUnicode;
     663             : 
     664             : #if defined(HAVE_WCRTOMB) || defined(HAVE_MBRTOWC)
     665             :   mbstate_t ps;
     666             : #endif
     667             : };
     668             : 
     669             : bool nsNativeCharsetConverter::gWCharIsUnicode = false;
     670             : 
     671             : nsNativeCharsetConverter::nsNativeCharsetConverter()
     672             : {
     673             : #if defined(HAVE_WCRTOMB) || defined(HAVE_MBRTOWC)
     674             :   memset(&ps, 0, sizeof(ps));
     675             : #endif
     676             : }
     677             : 
     678             : void
     679             : nsNativeCharsetConverter::GlobalInit()
     680             : {
     681             :   // verify that wchar_t for the current locale is actually unicode.
     682             :   // if it is not, then we should avoid calling mbtowc/wctomb and
     683             :   // just fallback on zero-pad/truncation conversion.
     684             :   //
     685             :   // this test cannot be done at build time because the encoding of
     686             :   // wchar_t may depend on the runtime locale.  sad, but true!!
     687             :   //
     688             :   // so, if wchar_t is unicode then converting an ASCII character
     689             :   // to wchar_t should not change its numeric value.  we'll just
     690             :   // check what happens with the ASCII 'a' character.
     691             :   //
     692             :   // this test is not perfect... obviously, it could yield false
     693             :   // positives, but then at least ASCII text would be converted
     694             :   // properly (or maybe just the 'a' character) -- oh well :(
     695             : 
     696             :   char a = 'a';
     697             :   unsigned int w = 0;
     698             : 
     699             :   int res = mbtowc((wchar_t*)&w, &a, 1);
     700             : 
     701             :   gWCharIsUnicode = (res != -1 && w == 'a');
     702             : 
     703             : #ifdef DEBUG
     704             :   if (!gWCharIsUnicode) {
     705             :     NS_WARNING("wchar_t is not unicode (unicode conversion will be lossy)");
     706             :   }
     707             : #endif
     708             : }
     709             : 
     710             : nsresult
     711             : nsNativeCharsetConverter::NativeToUnicode(const char** aInput,
     712             :                                           uint32_t* aInputLeft,
     713             :                                           char16_t** aOutput,
     714             :                                           uint32_t* aOutputLeft)
     715             : {
     716             :   if (gWCharIsUnicode) {
     717             :     int incr;
     718             : 
     719             :     // cannot use wchar_t here since it may have been redefined (e.g.,
     720             :     // via -fshort-wchar).  hopefully, sizeof(tmp) is sufficient XP.
     721             :     unsigned int tmp = 0;
     722             :     while (*aInputLeft && *aOutputLeft) {
     723             : #ifdef HAVE_MBRTOWC
     724             :       incr = (int)mbrtowc((wchar_t*)&tmp, *aInput, *aInputLeft, &ps);
     725             : #else
     726             :       // XXX is this thread-safe?
     727             :       incr = (int)mbtowc((wchar_t*)&tmp, *aInput, *aInputLeft);
     728             : #endif
     729             :       if (incr < 0) {
     730             :         NS_WARNING("mbtowc failed: possible charset mismatch");
     731             :         // zero-pad and hope for the best
     732             :         tmp = (unsigned char)**aInput;
     733             :         incr = 1;
     734             :       }
     735             :       ** aOutput = (char16_t)tmp;
     736             :       (*aInput) += incr;
     737             :       (*aInputLeft) -= incr;
     738             :       (*aOutput)++;
     739             :       (*aOutputLeft)--;
     740             :     }
     741             :   } else {
     742             :     // wchar_t isn't unicode, so the best we can do is treat the
     743             :     // input as if it is isolatin1 :(
     744             :     isolatin1_to_utf16(aInput, aInputLeft, aOutput, aOutputLeft);
     745             :   }
     746             : 
     747             :   return NS_OK;
     748             : }
     749             : 
     750             : nsresult
     751             : nsNativeCharsetConverter::UnicodeToNative(const char16_t** aInput,
     752             :                                           uint32_t* aInputLeft,
     753             :                                           char** aOutput,
     754             :                                           uint32_t* aOutputLeft)
     755             : {
     756             :   if (gWCharIsUnicode) {
     757             :     int incr;
     758             : 
     759             :     while (*aInputLeft && *aOutputLeft >= MB_CUR_MAX) {
     760             : #ifdef HAVE_WCRTOMB
     761             :       incr = (int)wcrtomb(*aOutput, (wchar_t)**aInput, &ps);
     762             : #else
     763             :       // XXX is this thread-safe?
     764             :       incr = (int)wctomb(*aOutput, (wchar_t)**aInput);
     765             : #endif
     766             :       if (incr < 0) {
     767             :         NS_WARNING("mbtowc failed: possible charset mismatch");
     768             :         ** aOutput = (unsigned char)**aInput; // truncate
     769             :         incr = 1;
     770             :       }
     771             :       // most likely we're dead anyways if this assertion should fire
     772             :       NS_ASSERTION(uint32_t(incr) <= *aOutputLeft, "wrote beyond end of string");
     773             :       (*aOutput) += incr;
     774             :       (*aOutputLeft) -= incr;
     775             :       (*aInput)++;
     776             :       (*aInputLeft)--;
     777             :     }
     778             :   } else {
     779             :     // wchar_t isn't unicode, so the best we can do is treat the
     780             :     // input as if it is isolatin1 :(
     781             :     utf16_to_isolatin1(aInput, aInputLeft, aOutput, aOutputLeft);
     782             :   }
     783             : 
     784             :   return NS_OK;
     785             : }
     786             : 
     787             : // XXX : for now, return false
     788             : bool
     789             : nsNativeCharsetConverter::IsNativeUTF8()
     790             : {
     791             :   return false;
     792             : }
     793             : 
     794             : #endif // USE_STDCONV
     795             : 
     796             : //-----------------------------------------------------------------------------
     797             : // API implementation
     798             : //-----------------------------------------------------------------------------
     799             : 
     800             : nsresult
     801        3003 : NS_CopyNativeToUnicode(const nsACString& aInput, nsAString& aOutput)
     802             : {
     803        3003 :   aOutput.Truncate();
     804             : 
     805        3003 :   uint32_t inputLen = aInput.Length();
     806             : 
     807        3003 :   nsACString::const_iterator iter;
     808        3003 :   aInput.BeginReading(iter);
     809             : 
     810             :   //
     811             :   // OPTIMIZATION: preallocate space for largest possible result; convert
     812             :   // directly into the result buffer to avoid intermediate buffer copy.
     813             :   //
     814             :   // this will generally result in a larger allocation, but that seems
     815             :   // better than an extra buffer copy.
     816             :   //
     817        3003 :   if (!aOutput.SetLength(inputLen, fallible)) {
     818           0 :     return NS_ERROR_OUT_OF_MEMORY;
     819             :   }
     820        3003 :   nsAString::iterator out_iter;
     821        3003 :   aOutput.BeginWriting(out_iter);
     822             : 
     823        3003 :   char16_t* result = out_iter.get();
     824        3003 :   uint32_t resultLeft = inputLen;
     825             : 
     826        3003 :   const char* buf = iter.get();
     827        3003 :   uint32_t bufLeft = inputLen;
     828             : 
     829        6006 :   nsNativeCharsetConverter conv;
     830        3003 :   nsresult rv = conv.NativeToUnicode(&buf, &bufLeft, &result, &resultLeft);
     831        3003 :   if (NS_SUCCEEDED(rv)) {
     832        3003 :     NS_ASSERTION(bufLeft == 0, "did not consume entire input buffer");
     833        3003 :     aOutput.SetLength(inputLen - resultLeft);
     834             :   }
     835        3003 :   return rv;
     836             : }
     837             : 
     838             : nsresult
     839        2688 : NS_CopyUnicodeToNative(const nsAString& aInput, nsACString& aOutput)
     840             : {
     841        2688 :   aOutput.Truncate();
     842             : 
     843        2688 :   nsAString::const_iterator iter, end;
     844        2688 :   aInput.BeginReading(iter);
     845        2688 :   aInput.EndReading(end);
     846             : 
     847             :   // cannot easily avoid intermediate buffer copy.
     848             :   char temp[4096];
     849             : 
     850        5376 :   nsNativeCharsetConverter conv;
     851             : 
     852        2688 :   const char16_t* buf = iter.get();
     853        2688 :   uint32_t bufLeft = Distance(iter, end);
     854        8064 :   while (bufLeft) {
     855        2688 :     char* p = temp;
     856        2688 :     uint32_t tempLeft = sizeof(temp);
     857             : 
     858        2688 :     nsresult rv = conv.UnicodeToNative(&buf, &bufLeft, &p, &tempLeft);
     859        2688 :     if (NS_FAILED(rv)) {
     860           0 :       return rv;
     861             :     }
     862             : 
     863        2688 :     if (tempLeft < sizeof(temp)) {
     864        2688 :       aOutput.Append(temp, sizeof(temp) - tempLeft);
     865             :     }
     866             :   }
     867        2688 :   return NS_OK;
     868             : }
     869             : 
     870             : bool
     871        2655 : NS_IsNativeUTF8()
     872             : {
     873        2655 :   return nsNativeCharsetConverter::IsNativeUTF8();
     874             : }
     875             : 
     876             : void
     877           3 : NS_StartupNativeCharsetUtils()
     878             : {
     879             :   //
     880             :   // need to initialize the locale or else charset conversion will fail.
     881             :   // better not delay this in case some other component alters the locale
     882             :   // settings.
     883             :   //
     884             :   // XXX we assume that we are called early enough that we should
     885             :   // always be the first to care about the locale's charset.
     886             :   //
     887           3 :   setlocale(LC_CTYPE, "");
     888             : 
     889           3 :   nsNativeCharsetConverter::GlobalInit();
     890           3 : }
     891             : 
     892             : void
     893           0 : NS_ShutdownNativeCharsetUtils()
     894             : {
     895           0 :   nsNativeCharsetConverter::GlobalShutdown();
     896           0 : }
     897             : 
     898             : //-----------------------------------------------------------------------------
     899             : // XP_WIN
     900             : //-----------------------------------------------------------------------------
     901             : #elif defined(XP_WIN)
     902             : 
     903             : #include <windows.h>
     904             : #include "nsString.h"
     905             : #include "nsAString.h"
     906             : #include "nsReadableUtils.h"
     907             : 
     908             : using namespace mozilla;
     909             : 
     910             : nsresult
     911             : NS_CopyNativeToUnicode(const nsACString& aInput, nsAString& aOutput)
     912             : {
     913             :   uint32_t inputLen = aInput.Length();
     914             : 
     915             :   nsACString::const_iterator iter;
     916             :   aInput.BeginReading(iter);
     917             : 
     918             :   const char* buf = iter.get();
     919             : 
     920             :   // determine length of result
     921             :   uint32_t resultLen = 0;
     922             :   int n = ::MultiByteToWideChar(CP_ACP, 0, buf, inputLen, nullptr, 0);
     923             :   if (n > 0) {
     924             :     resultLen += n;
     925             :   }
     926             : 
     927             :   // allocate sufficient space
     928             :   if (!aOutput.SetLength(resultLen, fallible)) {
     929             :     return NS_ERROR_OUT_OF_MEMORY;
     930             :   }
     931             :   if (resultLen > 0) {
     932             :     char16ptr_t result = aOutput.BeginWriting();
     933             :     ::MultiByteToWideChar(CP_ACP, 0, buf, inputLen, result, resultLen);
     934             :   }
     935             :   return NS_OK;
     936             : }
     937             : 
     938             : nsresult
     939             : NS_CopyUnicodeToNative(const nsAString&  aInput, nsACString& aOutput)
     940             : {
     941             :   uint32_t inputLen = aInput.Length();
     942             : 
     943             :   nsAString::const_iterator iter;
     944             :   aInput.BeginReading(iter);
     945             : 
     946             :   char16ptr_t buf = iter.get();
     947             : 
     948             :   // determine length of result
     949             :   uint32_t resultLen = 0;
     950             : 
     951             :   int n = ::WideCharToMultiByte(CP_ACP, 0, buf, inputLen, nullptr, 0,
     952             :                                 nullptr, nullptr);
     953             :   if (n > 0) {
     954             :     resultLen += n;
     955             :   }
     956             : 
     957             :   // allocate sufficient space
     958             :   if (!aOutput.SetLength(resultLen, fallible)) {
     959             :     return NS_ERROR_OUT_OF_MEMORY;
     960             :   }
     961             :   if (resultLen > 0) {
     962             :     nsACString::iterator out_iter;
     963             :     aOutput.BeginWriting(out_iter);
     964             : 
     965             :     // default "defaultChar" is '?', which is an illegal character on windows
     966             :     // file system.  That will cause file uncreatable. Change it to '_'
     967             :     const char defaultChar = '_';
     968             : 
     969             :     char* result = out_iter.get();
     970             : 
     971             :     ::WideCharToMultiByte(CP_ACP, 0, buf, inputLen, result, resultLen,
     972             :                           &defaultChar, nullptr);
     973             :   }
     974             :   return NS_OK;
     975             : }
     976             : 
     977             : #else
     978             : 
     979             : #include "nsReadableUtils.h"
     980             : 
     981             : nsresult
     982             : NS_CopyNativeToUnicode(const nsACString& aInput, nsAString& aOutput)
     983             : {
     984             :   CopyASCIItoUTF16(aInput, aOutput);
     985             :   return NS_OK;
     986             : }
     987             : 
     988             : nsresult
     989             : NS_CopyUnicodeToNative(const nsAString& aInput, nsACString& aOutput)
     990             : {
     991             :   LossyCopyUTF16toASCII(aInput, aOutput);
     992             :   return NS_OK;
     993             : }
     994             : 
     995             : void
     996             : NS_StartupNativeCharsetUtils()
     997             : {
     998             : }
     999             : 
    1000             : void
    1001             : NS_ShutdownNativeCharsetUtils()
    1002             : {
    1003             : }
    1004             : 
    1005             : #endif

Generated by: LCOV version 1.13