Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* vim:expandtab:shiftwidth=2:tabstop=4:
3 : */
4 : /* This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 : #include "nsString.h"
8 : #include "nsUTF8ConverterService.h"
9 : #include "nsEscape.h"
10 : #include "mozilla/Encoding.h"
11 :
12 : using namespace mozilla;
13 :
14 0 : NS_IMPL_ISUPPORTS(nsUTF8ConverterService, nsIUTF8ConverterService)
15 :
16 : static nsresult
17 0 : ToUTF8(const nsACString& aString,
18 : const char* aCharset,
19 : bool aAllowSubstitution,
20 : nsACString& aResult)
21 : {
22 0 : if (!aCharset || !*aCharset)
23 0 : return NS_ERROR_INVALID_ARG;
24 :
25 0 : auto encoding = Encoding::ForLabelNoReplacement(MakeStringSpan(aCharset));
26 0 : if (!encoding) {
27 0 : return NS_ERROR_UCONV_NOCONV;
28 : }
29 0 : if (aAllowSubstitution) {
30 0 : nsresult rv = encoding->DecodeWithoutBOMHandling(aString, aResult);
31 0 : if (NS_SUCCEEDED(rv)) {
32 0 : return NS_OK;
33 : }
34 0 : return rv;
35 : }
36 : return encoding->DecodeWithoutBOMHandlingAndWithoutReplacement(aString,
37 0 : aResult);
38 : }
39 :
40 : NS_IMETHODIMP
41 0 : nsUTF8ConverterService::ConvertStringToUTF8(const nsACString& aString,
42 : const char* aCharset,
43 : bool aSkipCheck,
44 : bool aAllowSubstitution,
45 : uint8_t aOptionalArgc,
46 : nsACString& aUTF8String)
47 : {
48 0 : bool allowSubstitution = (aOptionalArgc == 1) ? aAllowSubstitution : true;
49 :
50 : // return if ASCII only or valid UTF-8 providing that the ASCII/UTF-8
51 : // check is requested. It may not be asked for if a caller suspects
52 : // that the input is in non-ASCII 7bit charset (ISO-2022-xx, HZ) or
53 : // it's in a charset other than UTF-8 that can be mistaken for UTF-8.
54 0 : if (!aSkipCheck && (IsASCII(aString) || IsUTF8(aString))) {
55 0 : aUTF8String = aString;
56 0 : return NS_OK;
57 : }
58 :
59 0 : aUTF8String.Truncate();
60 :
61 0 : nsresult rv = ToUTF8(aString, aCharset, allowSubstitution, aUTF8String);
62 :
63 : // additional protection for cases where check is skipped and the input
64 : // is actually in UTF-8 as opposed to aCharset. (i.e. caller's hunch
65 : // was wrong.) We don't check ASCIIness assuming there's no charset
66 : // incompatible with ASCII (we don't support EBCDIC).
67 0 : if (aSkipCheck && NS_FAILED(rv) && IsUTF8(aString)) {
68 0 : aUTF8String = aString;
69 0 : return NS_OK;
70 : }
71 :
72 0 : return rv;
73 : }
74 :
75 : NS_IMETHODIMP
76 0 : nsUTF8ConverterService::ConvertURISpecToUTF8(const nsACString& aSpec,
77 : const char* aCharset,
78 : nsACString& aUTF8Spec)
79 : {
80 : // assume UTF-8 if the spec contains unescaped non-ASCII characters.
81 : // No valid spec in Mozilla would break this assumption.
82 0 : if (!IsASCII(aSpec)) {
83 0 : aUTF8Spec = aSpec;
84 0 : return NS_OK;
85 : }
86 :
87 0 : aUTF8Spec.Truncate();
88 :
89 0 : nsAutoCString unescapedSpec;
90 : // NS_UnescapeURL does not fill up unescapedSpec unless there's at least
91 : // one character to unescape.
92 0 : bool written = NS_UnescapeURL(PromiseFlatCString(aSpec).get(),
93 0 : aSpec.Length(),
94 : esc_OnlyNonASCII,
95 0 : unescapedSpec);
96 :
97 0 : if (!written) {
98 0 : aUTF8Spec = aSpec;
99 0 : return NS_OK;
100 : }
101 : // return if ASCII only or escaped UTF-8
102 0 : if (IsASCII(unescapedSpec) || IsUTF8(unescapedSpec)) {
103 0 : aUTF8Spec = unescapedSpec;
104 0 : return NS_OK;
105 : }
106 :
107 0 : return ToUTF8(unescapedSpec, aCharset, true, aUTF8Spec);
108 : }
109 :
|