Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* This Source Code Form is subject to the terms of the Mozilla Public
3 : * License, v. 2.0. If a copy of the MPL was not distributed with this
4 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 : #include "nsString.h"
6 : #include "nsITextToSubURI.h"
7 : #include "nsEscape.h"
8 : #include "nsTextToSubURI.h"
9 : #include "nsCRT.h"
10 : #include "mozilla/Encoding.h"
11 : #include "mozilla/Preferences.h"
12 : #include "nsISupportsPrimitives.h"
13 :
14 : using namespace mozilla;
15 :
16 : // Fallback value for the pref "network.IDN.blacklist_chars".
17 : // UnEscapeURIForUI allows unescaped space; other than that, this is
18 : // the same as the default "network.IDN.blacklist_chars" value.
19 : static const char16_t sNetworkIDNBlacklistChars[] =
20 : {
21 : /*0x0020,*/
22 : 0x00A0, 0x00BC, 0x00BD, 0x00BE, 0x01C3, 0x02D0, 0x0337,
23 : 0x0338, 0x0589, 0x058A, 0x05C3, 0x05F4, 0x0609, 0x060A, 0x066A, 0x06D4,
24 : 0x0701, 0x0702, 0x0703, 0x0704, 0x115F, 0x1160, 0x1735, 0x2000,
25 : 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008,
26 : 0x2009, 0x200A, 0x200B, 0x200E, 0x200F, 0x2010, 0x2019, 0x2024, 0x2027, 0x2028,
27 : 0x2029, 0x202A, 0x202B, 0x202C, 0x202D, 0x202E, 0x202F, 0x2039,
28 : 0x203A, 0x2041, 0x2044, 0x2052, 0x205F, 0x2153, 0x2154, 0x2155,
29 : 0x2156, 0x2157, 0x2158, 0x2159, 0x215A, 0x215B, 0x215C, 0x215D,
30 : 0x215E, 0x215F, 0x2215, 0x2236, 0x23AE, 0x2571, 0x29F6, 0x29F8,
31 : 0x2AFB, 0x2AFD, 0x2FF0, 0x2FF1, 0x2FF2, 0x2FF3, 0x2FF4, 0x2FF5,
32 : 0x2FF6, 0x2FF7, 0x2FF8, 0x2FF9, 0x2FFA, 0x2FFB, /*0x3000,*/ 0x3002,
33 : 0x3014, 0x3015, 0x3033, 0x30A0, 0x3164, 0x321D, 0x321E, 0x33AE, 0x33AF,
34 : 0x33C6, 0x33DF, 0xA789, 0xFE14, 0xFE15, 0xFE3F, 0xFE5D, 0xFE5E,
35 : 0xFEFF, 0xFF0E, 0xFF0F, 0xFF61, 0xFFA0, 0xFFF9, 0xFFFA, 0xFFFB,
36 : 0xFFFC, 0xFFFD
37 : };
38 :
39 0 : nsTextToSubURI::~nsTextToSubURI()
40 : {
41 0 : }
42 :
43 26 : NS_IMPL_ISUPPORTS(nsTextToSubURI, nsITextToSubURI)
44 :
45 : NS_IMETHODIMP
46 0 : nsTextToSubURI::ConvertAndEscape(const nsACString& aCharset,
47 : const nsAString& aText,
48 : nsACString& aOut)
49 : {
50 0 : auto encoding = Encoding::ForLabelNoReplacement(aCharset);
51 0 : if (!encoding) {
52 0 : aOut.Truncate();
53 0 : return NS_ERROR_UCONV_NOCONV;
54 : }
55 : nsresult rv;
56 : const Encoding* actualEncoding;
57 0 : nsAutoCString intermediate;
58 0 : Tie(rv, actualEncoding) = encoding->Encode(aText, intermediate);
59 : Unused << actualEncoding;
60 0 : if (NS_FAILED(rv)) {
61 0 : aOut.Truncate();
62 0 : return rv;
63 : }
64 0 : bool ok = NS_Escape(intermediate, aOut, url_XPAlphas);
65 0 : if (!ok) {
66 0 : aOut.Truncate();
67 0 : return NS_ERROR_OUT_OF_MEMORY;
68 : }
69 0 : return NS_OK;
70 : }
71 :
72 : NS_IMETHODIMP
73 0 : nsTextToSubURI::UnEscapeAndConvert(const nsACString& aCharset,
74 : const nsACString& aText,
75 : nsAString& aOut)
76 : {
77 0 : auto encoding = Encoding::ForLabelNoReplacement(aCharset);
78 0 : if (!encoding) {
79 0 : aOut.Truncate();
80 0 : return NS_ERROR_UCONV_NOCONV;
81 : }
82 0 : nsAutoCString unescaped(aText);
83 0 : NS_UnescapeURL(unescaped);
84 0 : auto rv = encoding->DecodeWithoutBOMHandling(unescaped, aOut);
85 0 : if (NS_SUCCEEDED(rv)) {
86 0 : return NS_OK;
87 : }
88 0 : return rv;
89 : }
90 :
91 2 : static bool statefulCharset(const char *charset)
92 : {
93 : // HZ, UTF-7 and the CN and KR ISO-2022 variants are no longer in
94 : // mozilla-central but keeping them here just in case for the benefit of
95 : // comm-central.
96 6 : if (!nsCRT::strncasecmp(charset, "ISO-2022-", sizeof("ISO-2022-")-1) ||
97 4 : !nsCRT::strcasecmp(charset, "UTF-7") ||
98 2 : !nsCRT::strcasecmp(charset, "HZ-GB-2312"))
99 0 : return true;
100 :
101 2 : return false;
102 : }
103 :
104 : nsresult
105 2 : nsTextToSubURI::convertURItoUnicode(const nsCString& aCharset,
106 : const nsCString& aURI,
107 : nsAString& aOut)
108 : {
109 : // check for 7bit encoding the data may not be ASCII after we decode
110 2 : bool isStatefulCharset = statefulCharset(aCharset.get());
111 :
112 2 : if (!isStatefulCharset) {
113 2 : if (IsASCII(aURI)) {
114 2 : CopyASCIItoUTF16(aURI, aOut);
115 2 : return NS_OK;
116 : }
117 0 : if (IsUTF8(aURI)) {
118 0 : CopyUTF8toUTF16(aURI, aOut);
119 0 : return NS_OK;
120 : }
121 : }
122 :
123 : // empty charset could indicate UTF-8, but aURI turns out not to be UTF-8.
124 0 : NS_ENSURE_FALSE(aCharset.IsEmpty(), NS_ERROR_INVALID_ARG);
125 :
126 0 : auto encoding = Encoding::ForLabelNoReplacement(aCharset);
127 0 : if (!encoding) {
128 0 : aOut.Truncate();
129 0 : return NS_ERROR_UCONV_NOCONV;
130 : }
131 0 : return encoding->DecodeWithoutBOMHandlingAndWithoutReplacement(aURI, aOut);
132 : }
133 :
134 1 : NS_IMETHODIMP nsTextToSubURI::UnEscapeURIForUI(const nsACString & aCharset,
135 : const nsACString &aURIFragment,
136 : nsAString &_retval)
137 : {
138 2 : nsAutoCString unescapedSpec;
139 : // skip control octets (0x00 - 0x1f and 0x7f) when unescaping
140 2 : NS_UnescapeURL(PromiseFlatCString(aURIFragment),
141 1 : esc_SkipControl | esc_AlwaysCopy, unescapedSpec);
142 :
143 : // in case of failure, return escaped URI
144 : // Test for != NS_OK rather than NS_FAILED, because incomplete multi-byte
145 : // sequences are also considered failure in this context
146 2 : if (convertURItoUnicode(
147 2 : PromiseFlatCString(aCharset), unescapedSpec, _retval)
148 : != NS_OK) {
149 : // assume UTF-8 instead of ASCII because hostname (IDN) may be in UTF-8
150 0 : CopyUTF8toUTF16(aURIFragment, _retval);
151 : }
152 :
153 : // If there are any characters that are unsafe for URIs, reescape those.
154 1 : if (mUnsafeChars.IsEmpty()) {
155 2 : nsAdoptingString blacklist;
156 : nsresult rv = mozilla::Preferences::GetString("network.IDN.blacklist_chars",
157 1 : &blacklist);
158 1 : if (NS_SUCCEEDED(rv)) {
159 : // we allow SPACE and IDEOGRAPHIC SPACE in this method
160 1 : blacklist.StripChars(u" \u3000");
161 1 : mUnsafeChars.AppendElements(static_cast<const char16_t*>(blacklist.Data()),
162 2 : blacklist.Length());
163 : } else {
164 0 : NS_WARNING("Failed to get the 'network.IDN.blacklist_chars' preference");
165 : }
166 : // We check IsEmpty() intentionally here because an empty (or just spaces)
167 : // pref value is likely a mistake/error of some sort.
168 1 : if (mUnsafeChars.IsEmpty()) {
169 0 : mUnsafeChars.AppendElements(sNetworkIDNBlacklistChars,
170 0 : mozilla::ArrayLength(sNetworkIDNBlacklistChars));
171 : }
172 1 : mUnsafeChars.Sort();
173 : }
174 2 : const nsPromiseFlatString& unescapedResult = PromiseFlatString(_retval);
175 2 : nsString reescapedSpec;
176 1 : _retval = NS_EscapeURL(unescapedResult, mUnsafeChars, reescapedSpec);
177 :
178 2 : return NS_OK;
179 : }
180 :
181 : NS_IMETHODIMP
182 1 : nsTextToSubURI::UnEscapeNonAsciiURI(const nsACString& aCharset,
183 : const nsACString& aURIFragment,
184 : nsAString& _retval)
185 : {
186 2 : nsAutoCString unescapedSpec;
187 2 : NS_UnescapeURL(PromiseFlatCString(aURIFragment),
188 1 : esc_AlwaysCopy | esc_OnlyNonASCII, unescapedSpec);
189 : // leave the URI as it is if it's not UTF-8 and aCharset is not a ASCII
190 : // superset since converting "http:" with such an encoding is always a bad
191 : // idea.
192 1 : if (!IsUTF8(unescapedSpec) &&
193 0 : (aCharset.LowerCaseEqualsLiteral("utf-16") ||
194 0 : aCharset.LowerCaseEqualsLiteral("utf-16be") ||
195 0 : aCharset.LowerCaseEqualsLiteral("utf-16le") ||
196 0 : aCharset.LowerCaseEqualsLiteral("utf-7") ||
197 0 : aCharset.LowerCaseEqualsLiteral("x-imap4-modified-utf7"))){
198 0 : CopyASCIItoUTF16(aURIFragment, _retval);
199 0 : return NS_OK;
200 : }
201 :
202 2 : nsresult rv = convertURItoUnicode(PromiseFlatCString(aCharset),
203 1 : unescapedSpec, _retval);
204 : // NS_OK_UDEC_MOREINPUT is a success code, so caller can't catch the error
205 : // if the string ends with a valid (but incomplete) sequence.
206 1 : return rv == NS_OK_UDEC_MOREINPUT ? NS_ERROR_UDEC_ILLEGALINPUT : rv;
207 : }
208 :
209 : //----------------------------------------------------------------------
|