Line data Source code
1 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* This Source Code Form is subject to the terms of the Mozilla Public
3 : * License, v. 2.0. If a copy of the MPL was not distributed with this
4 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 :
6 : #include "nsIDNService.h"
7 : #include "nsReadableUtils.h"
8 : #include "nsCRT.h"
9 : #include "nsUnicharUtils.h"
10 : #include "nsUnicodeProperties.h"
11 : #include "nsUnicodeScriptCodes.h"
12 : #include "harfbuzz/hb.h"
13 : #include "nsIServiceManager.h"
14 : #include "nsIPrefService.h"
15 : #include "nsIPrefBranch.h"
16 : #include "nsIObserverService.h"
17 : #include "nsISupportsPrimitives.h"
18 : #include "punycode.h"
19 :
20 : #ifdef IDNA2008
21 : // Currently we use the non-transitional processing option -- see
22 : // http://unicode.org/reports/tr46/
23 : // To switch to transitional processing, change the value of this flag
24 : // and kTransitionalProcessing in netwerk/test/unit/test_idna2008.js to true
25 : // (revert bug 1218179).
26 : const bool kIDNA2008_TransitionalProcessing = false;
27 :
28 : #include "ICUUtils.h"
29 : #endif
30 :
31 : using namespace mozilla::unicode;
32 :
33 : //-----------------------------------------------------------------------------
34 : // RFC 1034 - 3.1. Name space specifications and terminology
35 : static const uint32_t kMaxDNSNodeLen = 63;
36 : // RFC 3490 - 5. ACE prefix
37 : static const char kACEPrefix[] = "xn--";
38 : #define kACEPrefixLen 4
39 :
40 : //-----------------------------------------------------------------------------
41 :
42 : #define NS_NET_PREF_IDNBLACKLIST "network.IDN.blacklist_chars"
43 : #define NS_NET_PREF_SHOWPUNYCODE "network.IDN_show_punycode"
44 : #define NS_NET_PREF_IDNWHITELIST "network.IDN.whitelist."
45 : #define NS_NET_PREF_IDNUSEWHITELIST "network.IDN.use_whitelist"
46 : #define NS_NET_PREF_IDNRESTRICTION "network.IDN.restriction_profile"
47 :
48 0 : inline bool isOnlySafeChars(const nsString& in, const nsString& blacklist)
49 : {
50 0 : return (blacklist.IsEmpty() ||
51 0 : in.FindCharInSet(blacklist) == kNotFound);
52 : }
53 :
54 : //-----------------------------------------------------------------------------
55 : // nsIDNService
56 : //-----------------------------------------------------------------------------
57 :
58 : /* Implementation file */
59 159 : NS_IMPL_ISUPPORTS(nsIDNService,
60 : nsIIDNService,
61 : nsIObserver,
62 : nsISupportsWeakReference)
63 :
64 3 : nsresult nsIDNService::Init()
65 : {
66 3 : MOZ_ASSERT(NS_IsMainThread());
67 6 : MutexAutoLock lock(mLock);
68 :
69 6 : nsCOMPtr<nsIPrefService> prefs(do_GetService(NS_PREFSERVICE_CONTRACTID));
70 3 : if (prefs)
71 3 : prefs->GetBranch(NS_NET_PREF_IDNWHITELIST, getter_AddRefs(mIDNWhitelistPrefBranch));
72 :
73 6 : nsCOMPtr<nsIPrefBranch> prefInternal(do_QueryInterface(prefs));
74 3 : if (prefInternal) {
75 3 : prefInternal->AddObserver(NS_NET_PREF_IDNBLACKLIST, this, true);
76 3 : prefInternal->AddObserver(NS_NET_PREF_SHOWPUNYCODE, this, true);
77 3 : prefInternal->AddObserver(NS_NET_PREF_IDNRESTRICTION, this, true);
78 3 : prefInternal->AddObserver(NS_NET_PREF_IDNUSEWHITELIST, this, true);
79 3 : prefsChanged(prefInternal, nullptr);
80 : }
81 :
82 6 : return NS_OK;
83 : }
84 :
85 0 : NS_IMETHODIMP nsIDNService::Observe(nsISupports *aSubject,
86 : const char *aTopic,
87 : const char16_t *aData)
88 : {
89 0 : MOZ_ASSERT(NS_IsMainThread());
90 0 : MutexAutoLock lock(mLock);
91 :
92 0 : if (!strcmp(aTopic, NS_PREFBRANCH_PREFCHANGE_TOPIC_ID)) {
93 0 : nsCOMPtr<nsIPrefBranch> prefBranch( do_QueryInterface(aSubject) );
94 0 : if (prefBranch)
95 0 : prefsChanged(prefBranch, aData);
96 : }
97 0 : return NS_OK;
98 : }
99 :
100 3 : void nsIDNService::prefsChanged(nsIPrefBranch *prefBranch, const char16_t *pref)
101 : {
102 3 : MOZ_ASSERT(NS_IsMainThread());
103 3 : mLock.AssertCurrentThreadOwns();
104 :
105 3 : if (!pref || NS_LITERAL_STRING(NS_NET_PREF_IDNBLACKLIST).Equals(pref)) {
106 6 : nsCOMPtr<nsISupportsString> blacklist;
107 3 : nsresult rv = prefBranch->GetComplexValue(NS_NET_PREF_IDNBLACKLIST,
108 : NS_GET_IID(nsISupportsString),
109 6 : getter_AddRefs(blacklist));
110 3 : if (NS_SUCCEEDED(rv))
111 3 : blacklist->ToString(getter_Copies(mIDNBlacklist));
112 : else
113 0 : mIDNBlacklist.Truncate();
114 : }
115 3 : if (!pref || NS_LITERAL_STRING(NS_NET_PREF_SHOWPUNYCODE).Equals(pref)) {
116 : bool val;
117 3 : if (NS_SUCCEEDED(prefBranch->GetBoolPref(NS_NET_PREF_SHOWPUNYCODE, &val)))
118 3 : mShowPunycode = val;
119 : }
120 3 : if (!pref || NS_LITERAL_STRING(NS_NET_PREF_IDNUSEWHITELIST).Equals(pref)) {
121 : bool val;
122 3 : if (NS_SUCCEEDED(prefBranch->GetBoolPref(NS_NET_PREF_IDNUSEWHITELIST,
123 : &val)))
124 3 : mIDNUseWhitelist = val;
125 : }
126 3 : if (!pref || NS_LITERAL_STRING(NS_NET_PREF_IDNRESTRICTION).Equals(pref)) {
127 6 : nsXPIDLCString profile;
128 3 : if (NS_FAILED(prefBranch->GetCharPref(NS_NET_PREF_IDNRESTRICTION,
129 : getter_Copies(profile)))) {
130 0 : profile.Truncate();
131 : }
132 3 : if (profile.EqualsLiteral("moderate")) {
133 3 : mRestrictionProfile = eModeratelyRestrictiveProfile;
134 0 : } else if (profile.EqualsLiteral("high")) {
135 0 : mRestrictionProfile = eHighlyRestrictiveProfile;
136 : } else {
137 0 : mRestrictionProfile = eASCIIOnlyProfile;
138 : }
139 : }
140 3 : }
141 :
142 3 : nsIDNService::nsIDNService()
143 : : mLock("DNService pref value lock")
144 : , mShowPunycode(false)
145 3 : , mIDNUseWhitelist(false)
146 : {
147 3 : MOZ_ASSERT(NS_IsMainThread());
148 :
149 : #ifdef IDNA2008
150 3 : uint32_t IDNAOptions = UIDNA_CHECK_BIDI | UIDNA_CHECK_CONTEXTJ;
151 : if (!kIDNA2008_TransitionalProcessing) {
152 3 : IDNAOptions |= UIDNA_NONTRANSITIONAL_TO_UNICODE;
153 : }
154 3 : UErrorCode errorCode = U_ZERO_ERROR;
155 3 : mIDNA = uidna_openUTS46(IDNAOptions, &errorCode);
156 : #else
157 : if (idn_success != idn_nameprep_create(nullptr, &mNamePrepHandle))
158 : mNamePrepHandle = nullptr;
159 :
160 : mNormalizer = do_GetService(NS_UNICODE_NORMALIZER_CONTRACTID);
161 : /* member initializers and constructor code */
162 : #endif
163 3 : }
164 :
165 0 : nsIDNService::~nsIDNService()
166 : {
167 0 : MOZ_ASSERT(NS_IsMainThread());
168 :
169 : #ifdef IDNA2008
170 0 : uidna_close(mIDNA);
171 : #else
172 : idn_nameprep_destroy(mNamePrepHandle);
173 : #endif
174 0 : }
175 :
176 : #ifdef IDNA2008
177 : nsresult
178 0 : nsIDNService::IDNA2008ToUnicode(const nsACString& input, nsAString& output)
179 : {
180 0 : NS_ConvertUTF8toUTF16 inputStr(input);
181 0 : UIDNAInfo info = UIDNA_INFO_INITIALIZER;
182 0 : UErrorCode errorCode = U_ZERO_ERROR;
183 0 : int32_t inLen = inputStr.Length();
184 0 : int32_t outMaxLen = kMaxDNSNodeLen + 1;
185 : UChar outputBuffer[kMaxDNSNodeLen + 1];
186 :
187 0 : int32_t outLen = uidna_labelToUnicode(mIDNA, (const UChar*)inputStr.get(),
188 : inLen, outputBuffer, outMaxLen,
189 0 : &info, &errorCode);
190 0 : if (info.errors != 0) {
191 0 : return NS_ERROR_MALFORMED_URI;
192 : }
193 :
194 0 : if (U_SUCCESS(errorCode)) {
195 0 : ICUUtils::AssignUCharArrayToString(outputBuffer, outLen, output);
196 : }
197 :
198 0 : nsresult rv = ICUUtils::UErrorToNsResult(errorCode);
199 0 : if (rv == NS_ERROR_FAILURE) {
200 0 : rv = NS_ERROR_MALFORMED_URI;
201 : }
202 0 : return rv;
203 : }
204 :
205 : nsresult
206 0 : nsIDNService::IDNA2008StringPrep(const nsAString& input,
207 : nsAString& output,
208 : stringPrepFlag flag)
209 : {
210 0 : UIDNAInfo info = UIDNA_INFO_INITIALIZER;
211 0 : UErrorCode errorCode = U_ZERO_ERROR;
212 0 : int32_t inLen = input.Length();
213 0 : int32_t outMaxLen = kMaxDNSNodeLen + 1;
214 : UChar outputBuffer[kMaxDNSNodeLen + 1];
215 :
216 : int32_t outLen =
217 0 : uidna_labelToUnicode(mIDNA, (const UChar*)PromiseFlatString(input).get(),
218 0 : inLen, outputBuffer, outMaxLen, &info, &errorCode);
219 0 : nsresult rv = ICUUtils::UErrorToNsResult(errorCode);
220 0 : if (rv == NS_ERROR_FAILURE) {
221 0 : rv = NS_ERROR_MALFORMED_URI;
222 : }
223 0 : NS_ENSURE_SUCCESS(rv, rv);
224 :
225 : // Output the result of nameToUnicode even if there were errors
226 0 : ICUUtils::AssignUCharArrayToString(outputBuffer, outLen, output);
227 :
228 0 : if (flag == eStringPrepIgnoreErrors) {
229 0 : return NS_OK;
230 : }
231 :
232 0 : if (info.errors != 0) {
233 0 : if (flag == eStringPrepForDNS) {
234 0 : output.Truncate();
235 : }
236 0 : rv = NS_ERROR_MALFORMED_URI;
237 : }
238 :
239 0 : return rv;
240 : }
241 : #endif
242 :
243 0 : NS_IMETHODIMP nsIDNService::ConvertUTF8toACE(const nsACString & input, nsACString & ace)
244 : {
245 0 : return UTF8toACE(input, ace, eStringPrepForDNS);
246 : }
247 :
248 0 : nsresult nsIDNService::UTF8toACE(const nsACString & input, nsACString & ace,
249 : stringPrepFlag flag)
250 : {
251 : nsresult rv;
252 0 : NS_ConvertUTF8toUTF16 ustr(input);
253 :
254 : // map ideographic period to ASCII period etc.
255 0 : normalizeFullStops(ustr);
256 :
257 : uint32_t len, offset;
258 0 : len = 0;
259 0 : offset = 0;
260 0 : nsAutoCString encodedBuf;
261 :
262 0 : nsAString::const_iterator start, end;
263 0 : ustr.BeginReading(start);
264 0 : ustr.EndReading(end);
265 0 : ace.Truncate();
266 :
267 : // encode nodes if non ASCII
268 0 : while (start != end) {
269 0 : len++;
270 0 : if (*start++ == (char16_t)'.') {
271 0 : rv = stringPrepAndACE(Substring(ustr, offset, len - 1), encodedBuf, flag);
272 0 : NS_ENSURE_SUCCESS(rv, rv);
273 :
274 0 : ace.Append(encodedBuf);
275 0 : ace.Append('.');
276 0 : offset += len;
277 0 : len = 0;
278 : }
279 : }
280 :
281 : // encode the last node if non ASCII
282 0 : if (len) {
283 0 : rv = stringPrepAndACE(Substring(ustr, offset, len), encodedBuf, flag);
284 0 : NS_ENSURE_SUCCESS(rv, rv);
285 :
286 0 : ace.Append(encodedBuf);
287 : }
288 :
289 0 : return NS_OK;
290 : }
291 :
292 134 : NS_IMETHODIMP nsIDNService::ConvertACEtoUTF8(const nsACString & input, nsACString & _retval)
293 : {
294 134 : return ACEtoUTF8(input, _retval, eStringPrepForDNS);
295 : }
296 :
297 134 : nsresult nsIDNService::ACEtoUTF8(const nsACString & input, nsACString & _retval,
298 : stringPrepFlag flag)
299 : {
300 : // RFC 3490 - 4.2 ToUnicode
301 : // ToUnicode never fails. If any step fails, then the original input
302 : // sequence is returned immediately in that step.
303 :
304 134 : uint32_t len = 0, offset = 0;
305 268 : nsAutoCString decodedBuf;
306 :
307 134 : nsACString::const_iterator start, end;
308 134 : input.BeginReading(start);
309 134 : input.EndReading(end);
310 134 : _retval.Truncate();
311 :
312 : // loop and decode nodes
313 1924 : while (start != end) {
314 895 : len++;
315 895 : if (*start++ == '.') {
316 0 : if (NS_FAILED(decodeACE(Substring(input, offset, len - 1), decodedBuf,
317 : flag))) {
318 0 : _retval.Assign(input);
319 0 : return NS_OK;
320 : }
321 :
322 0 : _retval.Append(decodedBuf);
323 0 : _retval.Append('.');
324 0 : offset += len;
325 0 : len = 0;
326 : }
327 : }
328 : // decode the last node
329 134 : if (len) {
330 134 : if (NS_FAILED(decodeACE(Substring(input, offset, len), decodedBuf,
331 : flag)))
332 0 : _retval.Assign(input);
333 : else
334 134 : _retval.Append(decodedBuf);
335 : }
336 :
337 134 : return NS_OK;
338 : }
339 :
340 1727 : NS_IMETHODIMP nsIDNService::IsACE(const nsACString & input, bool *_retval)
341 : {
342 1727 : const char *data = input.BeginReading();
343 1727 : uint32_t dataLen = input.Length();
344 :
345 : // look for the ACE prefix in the input string. it may occur
346 : // at the beginning of any segment in the domain name. for
347 : // example: "www.xn--ENCODED.com"
348 :
349 1727 : const char *p = PL_strncasestr(data, kACEPrefix, dataLen);
350 :
351 1727 : *_retval = p && (p == data || *(p - 1) == '.');
352 1727 : return NS_OK;
353 : }
354 :
355 0 : NS_IMETHODIMP nsIDNService::Normalize(const nsACString & input,
356 : nsACString & output)
357 : {
358 : // protect against bogus input
359 0 : NS_ENSURE_TRUE(IsUTF8(input), NS_ERROR_UNEXPECTED);
360 :
361 0 : NS_ConvertUTF8toUTF16 inUTF16(input);
362 0 : normalizeFullStops(inUTF16);
363 :
364 : // pass the domain name to stringprep label by label
365 0 : nsAutoString outUTF16, outLabel;
366 :
367 0 : uint32_t len = 0, offset = 0;
368 : nsresult rv;
369 0 : nsAString::const_iterator start, end;
370 0 : inUTF16.BeginReading(start);
371 0 : inUTF16.EndReading(end);
372 :
373 0 : while (start != end) {
374 0 : len++;
375 0 : if (*start++ == char16_t('.')) {
376 0 : rv = stringPrep(Substring(inUTF16, offset, len - 1), outLabel,
377 0 : eStringPrepIgnoreErrors);
378 0 : NS_ENSURE_SUCCESS(rv, rv);
379 :
380 0 : outUTF16.Append(outLabel);
381 0 : outUTF16.Append(char16_t('.'));
382 0 : offset += len;
383 0 : len = 0;
384 : }
385 : }
386 0 : if (len) {
387 0 : rv = stringPrep(Substring(inUTF16, offset, len), outLabel,
388 0 : eStringPrepIgnoreErrors);
389 0 : NS_ENSURE_SUCCESS(rv, rv);
390 :
391 0 : outUTF16.Append(outLabel);
392 : }
393 :
394 0 : CopyUTF16toUTF8(outUTF16, output);
395 0 : return NS_OK;
396 : }
397 :
398 : namespace {
399 :
400 : class MOZ_STACK_CLASS MutexSettableAutoUnlock final
401 : {
402 : Mutex* mMutex;
403 : public:
404 1593 : MutexSettableAutoUnlock()
405 1593 : : mMutex(nullptr)
406 1593 : { }
407 :
408 : void
409 0 : Acquire(mozilla::Mutex& aMutex)
410 : {
411 0 : MOZ_ASSERT(!mMutex);
412 0 : mMutex = &aMutex;
413 0 : mMutex->Lock();
414 0 : }
415 :
416 1593 : ~MutexSettableAutoUnlock()
417 1593 : {
418 1593 : if (mMutex) {
419 0 : mMutex->Unlock();
420 : }
421 1593 : }
422 : };
423 :
424 : } // anonymous namespace
425 :
426 1593 : NS_IMETHODIMP nsIDNService::ConvertToDisplayIDN(const nsACString & input, bool * _isASCII, nsACString & _retval)
427 : {
428 3186 : MutexSettableAutoUnlock lock;
429 1593 : if (!NS_IsMainThread()) {
430 0 : lock.Acquire(mLock);
431 : }
432 :
433 : // If host is ACE, then convert to UTF-8 if the host is in the IDN whitelist.
434 : // Else, if host is already UTF-8, then make sure it is normalized per IDN.
435 :
436 1593 : nsresult rv = NS_OK;
437 :
438 : // Even if the hostname is not ASCII, individual labels may still be ACE, so
439 : // test IsACE before testing IsASCII
440 : bool isACE;
441 1593 : IsACE(input, &isACE);
442 :
443 1593 : if (IsASCII(input)) {
444 : // first, canonicalize the host to lowercase, for whitelist lookup
445 1593 : _retval = input;
446 1593 : ToLowerCase(_retval);
447 :
448 1593 : if (isACE && !mShowPunycode) {
449 : // ACEtoUTF8() can't fail, but might return the original ACE string
450 0 : nsAutoCString temp(_retval);
451 : // If the domain is in the whitelist, return the host in UTF-8.
452 : // Otherwise convert from ACE to UTF8 only those labels which are
453 : // considered safe for display
454 0 : ACEtoUTF8(temp, _retval, isInWhitelist(temp) ?
455 0 : eStringPrepIgnoreErrors : eStringPrepForUI);
456 0 : *_isASCII = IsASCII(_retval);
457 : } else {
458 1593 : *_isASCII = true;
459 : }
460 : } else {
461 : // We have to normalize the hostname before testing against the domain
462 : // whitelist (see bug 315411), and to ensure the entire string gets
463 : // normalized.
464 : //
465 : // Normalization and the tests for safe display below, assume that the
466 : // input is Unicode, so first convert any ACE labels to UTF8
467 0 : if (isACE) {
468 0 : nsAutoCString temp;
469 0 : ACEtoUTF8(input, temp, eStringPrepIgnoreErrors);
470 0 : rv = Normalize(temp, _retval);
471 : } else {
472 0 : rv = Normalize(input, _retval);
473 : }
474 0 : if (NS_FAILED(rv)) return rv;
475 :
476 0 : if (mShowPunycode && NS_SUCCEEDED(UTF8toACE(_retval, _retval,
477 : eStringPrepIgnoreErrors))) {
478 0 : *_isASCII = true;
479 0 : return NS_OK;
480 : }
481 :
482 : // normalization could result in an ASCII-only hostname. alternatively, if
483 : // the host is converted to ACE by the normalizer, then the host may contain
484 : // unsafe characters, so leave it ACE encoded. see bug 283016, bug 301694, and bug 309311.
485 0 : *_isASCII = IsASCII(_retval);
486 0 : if (!*_isASCII && !isInWhitelist(_retval)) {
487 : // UTF8toACE with eStringPrepForUI may return a domain name where
488 : // some labels are in UTF-8 and some are in ACE, depending on
489 : // whether they are considered safe for display
490 0 : rv = UTF8toACE(_retval, _retval, eStringPrepForUI);
491 0 : *_isASCII = IsASCII(_retval);
492 0 : return rv;
493 : }
494 : }
495 :
496 1593 : return NS_OK;
497 : }
498 :
499 : //-----------------------------------------------------------------------------
500 :
501 0 : static nsresult utf16ToUcs4(const nsAString& in,
502 : uint32_t *out,
503 : uint32_t outBufLen,
504 : uint32_t *outLen)
505 : {
506 0 : uint32_t i = 0;
507 0 : nsAString::const_iterator start, end;
508 0 : in.BeginReading(start);
509 0 : in.EndReading(end);
510 :
511 0 : while (start != end) {
512 : char16_t curChar;
513 :
514 0 : curChar= *start++;
515 :
516 0 : if (start != end &&
517 0 : NS_IS_HIGH_SURROGATE(curChar) &&
518 0 : NS_IS_LOW_SURROGATE(*start)) {
519 0 : out[i] = SURROGATE_TO_UCS4(curChar, *start);
520 0 : ++start;
521 : }
522 : else
523 0 : out[i] = curChar;
524 :
525 0 : i++;
526 0 : if (i >= outBufLen)
527 0 : return NS_ERROR_MALFORMED_URI;
528 : }
529 0 : out[i] = (uint32_t)'\0';
530 0 : *outLen = i;
531 0 : return NS_OK;
532 : }
533 :
534 : #ifndef IDNA2008
535 : static void ucs4toUtf16(const uint32_t *in, nsAString& out)
536 : {
537 : while (*in) {
538 : if (!IS_IN_BMP(*in)) {
539 : out.Append((char16_t) H_SURROGATE(*in));
540 : out.Append((char16_t) L_SURROGATE(*in));
541 : }
542 : else
543 : out.Append((char16_t) *in);
544 : in++;
545 : }
546 : }
547 : #endif
548 :
549 0 : static nsresult punycode(const nsAString& in, nsACString& out)
550 : {
551 : uint32_t ucs4Buf[kMaxDNSNodeLen + 1];
552 0 : uint32_t ucs4Len = 0u;
553 0 : nsresult rv = utf16ToUcs4(in, ucs4Buf, kMaxDNSNodeLen, &ucs4Len);
554 0 : NS_ENSURE_SUCCESS(rv, rv);
555 :
556 : // need maximum 20 bits to encode 16 bit Unicode character
557 : // (include null terminator)
558 0 : const uint32_t kEncodedBufSize = kMaxDNSNodeLen * 20 / 8 + 1 + 1;
559 : char encodedBuf[kEncodedBufSize];
560 0 : punycode_uint encodedLength = kEncodedBufSize;
561 :
562 0 : enum punycode_status status = punycode_encode(ucs4Len,
563 : ucs4Buf,
564 : nullptr,
565 : &encodedLength,
566 0 : encodedBuf);
567 :
568 0 : if (punycode_success != status ||
569 0 : encodedLength >= kEncodedBufSize)
570 0 : return NS_ERROR_MALFORMED_URI;
571 :
572 0 : encodedBuf[encodedLength] = '\0';
573 0 : out.Assign(nsDependentCString(kACEPrefix) + nsDependentCString(encodedBuf));
574 :
575 0 : return rv;
576 : }
577 :
578 : // RFC 3454
579 : //
580 : // 1) Map -- For each character in the input, check if it has a mapping
581 : // and, if so, replace it with its mapping. This is described in section 3.
582 : //
583 : // 2) Normalize -- Possibly normalize the result of step 1 using Unicode
584 : // normalization. This is described in section 4.
585 : //
586 : // 3) Prohibit -- Check for any characters that are not allowed in the
587 : // output. If any are found, return an error. This is described in section
588 : // 5.
589 : //
590 : // 4) Check bidi -- Possibly check for right-to-left characters, and if any
591 : // are found, make sure that the whole string satisfies the requirements
592 : // for bidirectional strings. If the string does not satisfy the requirements
593 : // for bidirectional strings, return an error. This is described in section 6.
594 : //
595 : // 5) Check unassigned code points -- If allowUnassigned is false, check for
596 : // any unassigned Unicode points and if any are found return an error.
597 : // This is described in section 7.
598 : //
599 0 : nsresult nsIDNService::stringPrep(const nsAString& in, nsAString& out,
600 : stringPrepFlag flag)
601 : {
602 : #ifdef IDNA2008
603 0 : return IDNA2008StringPrep(in, out, flag);
604 : #else
605 : if (!mNamePrepHandle || !mNormalizer)
606 : return NS_ERROR_FAILURE;
607 :
608 : uint32_t ucs4Buf[kMaxDNSNodeLen + 1];
609 : uint32_t ucs4Len;
610 : nsresult rv = utf16ToUcs4(in, ucs4Buf, kMaxDNSNodeLen, &ucs4Len);
611 : NS_ENSURE_SUCCESS(rv, rv);
612 :
613 : // map
614 : idn_result_t idn_err;
615 :
616 : uint32_t namePrepBuf[kMaxDNSNodeLen * 3]; // map up to three characters
617 : idn_err = idn_nameprep_map(mNamePrepHandle, (const uint32_t *) ucs4Buf,
618 : (uint32_t *) namePrepBuf, kMaxDNSNodeLen * 3);
619 : NS_ENSURE_TRUE(idn_err == idn_success, NS_ERROR_MALFORMED_URI);
620 :
621 : nsAutoString namePrepStr;
622 : ucs4toUtf16(namePrepBuf, namePrepStr);
623 : if (namePrepStr.Length() >= kMaxDNSNodeLen)
624 : return NS_ERROR_MALFORMED_URI;
625 :
626 : // normalize
627 : nsAutoString normlizedStr;
628 : rv = mNormalizer->NormalizeUnicodeNFKC(namePrepStr, normlizedStr);
629 : if (normlizedStr.Length() >= kMaxDNSNodeLen)
630 : return NS_ERROR_MALFORMED_URI;
631 :
632 : // set the result string
633 : out.Assign(normlizedStr);
634 :
635 : if (flag == eStringPrepIgnoreErrors) {
636 : return NS_OK;
637 : }
638 :
639 : // prohibit
640 : const uint32_t *found = nullptr;
641 : idn_err = idn_nameprep_isprohibited(mNamePrepHandle,
642 : (const uint32_t *) ucs4Buf, &found);
643 : if (idn_err != idn_success || found) {
644 : rv = NS_ERROR_MALFORMED_URI;
645 : } else {
646 : // check bidi
647 : idn_err = idn_nameprep_isvalidbidi(mNamePrepHandle,
648 : (const uint32_t *) ucs4Buf, &found);
649 : if (idn_err != idn_success || found) {
650 : rv = NS_ERROR_MALFORMED_URI;
651 : } else if (flag == eStringPrepForUI) {
652 : // check unassigned code points
653 : idn_err = idn_nameprep_isunassigned(mNamePrepHandle,
654 : (const uint32_t *) ucs4Buf, &found);
655 : if (idn_err != idn_success || found) {
656 : rv = NS_ERROR_MALFORMED_URI;
657 : }
658 : }
659 : }
660 :
661 : if (flag == eStringPrepForDNS && NS_FAILED(rv)) {
662 : out.Truncate();
663 : }
664 :
665 : return rv;
666 : #endif
667 : }
668 :
669 0 : nsresult nsIDNService::stringPrepAndACE(const nsAString& in, nsACString& out,
670 : stringPrepFlag flag)
671 : {
672 0 : nsresult rv = NS_OK;
673 :
674 0 : out.Truncate();
675 :
676 0 : if (in.Length() > kMaxDNSNodeLen) {
677 0 : NS_WARNING("IDN node too large");
678 0 : return NS_ERROR_MALFORMED_URI;
679 : }
680 :
681 0 : if (IsASCII(in)) {
682 0 : LossyCopyUTF16toASCII(in, out);
683 0 : return NS_OK;
684 : }
685 :
686 0 : nsAutoString strPrep;
687 0 : rv = stringPrep(in, strPrep, flag);
688 0 : if (flag == eStringPrepForDNS) {
689 0 : NS_ENSURE_SUCCESS(rv, rv);
690 : }
691 :
692 0 : if (IsASCII(strPrep)) {
693 0 : LossyCopyUTF16toASCII(strPrep, out);
694 0 : return NS_OK;
695 : }
696 :
697 0 : if (flag == eStringPrepForUI && NS_SUCCEEDED(rv) && isLabelSafe(in)) {
698 0 : CopyUTF16toUTF8(strPrep, out);
699 0 : return NS_OK;
700 : }
701 :
702 0 : rv = punycode(strPrep, out);
703 : // Check that the encoded output isn't larger than the maximum length
704 : // of a DNS node per RFC 1034.
705 : // This test isn't necessary in the code paths above where the input
706 : // is ASCII (since the output will be the same length as the input) or
707 : // where we convert to UTF-8 (since the output is only used for
708 : // display in the UI and not passed to DNS and can legitimately be
709 : // longer than the limit).
710 0 : if (out.Length() > kMaxDNSNodeLen) {
711 0 : NS_WARNING("IDN node too large");
712 0 : return NS_ERROR_MALFORMED_URI;
713 : }
714 :
715 0 : return rv;
716 : }
717 :
718 : // RFC 3490
719 : // 1) Whenever dots are used as label separators, the following characters
720 : // MUST be recognized as dots: U+002E (full stop), U+3002 (ideographic full
721 : // stop), U+FF0E (fullwidth full stop), U+FF61 (halfwidth ideographic full
722 : // stop).
723 :
724 0 : void nsIDNService::normalizeFullStops(nsAString& s)
725 : {
726 0 : nsAString::const_iterator start, end;
727 0 : s.BeginReading(start);
728 0 : s.EndReading(end);
729 0 : int32_t index = 0;
730 :
731 0 : while (start != end) {
732 0 : switch (*start) {
733 : case 0x3002:
734 : case 0xFF0E:
735 : case 0xFF61:
736 0 : s.Replace(index, 1, NS_LITERAL_STRING("."));
737 0 : break;
738 : default:
739 0 : break;
740 : }
741 0 : start++;
742 0 : index++;
743 : }
744 0 : }
745 :
746 134 : nsresult nsIDNService::decodeACE(const nsACString& in, nsACString& out,
747 : stringPrepFlag flag)
748 : {
749 : bool isAce;
750 134 : IsACE(in, &isAce);
751 134 : if (!isAce) {
752 134 : out.Assign(in);
753 134 : return NS_OK;
754 : }
755 :
756 0 : nsAutoString utf16;
757 : #ifdef IDNA2008
758 0 : nsresult result = IDNA2008ToUnicode(in, utf16);
759 0 : NS_ENSURE_SUCCESS(result, result);
760 : #else
761 : // RFC 3490 - 4.2 ToUnicode
762 : // The ToUnicode output never contains more code points than its input.
763 : punycode_uint output_length = in.Length() - kACEPrefixLen + 1;
764 : auto *output = new punycode_uint[output_length];
765 : NS_ENSURE_TRUE(output, NS_ERROR_OUT_OF_MEMORY);
766 :
767 : enum punycode_status status = punycode_decode(in.Length() - kACEPrefixLen,
768 : PromiseFlatCString(in).get() + kACEPrefixLen,
769 : &output_length,
770 : output,
771 : nullptr);
772 : if (status != punycode_success) {
773 : delete [] output;
774 : return NS_ERROR_MALFORMED_URI;
775 : }
776 :
777 : // UCS4 -> UTF8
778 : output[output_length] = 0;
779 : ucs4toUtf16(output, utf16);
780 : delete [] output;
781 : #endif
782 0 : if (flag != eStringPrepForUI || isLabelSafe(utf16)) {
783 0 : CopyUTF16toUTF8(utf16, out);
784 : } else {
785 0 : out.Assign(in);
786 0 : return NS_OK;
787 : }
788 :
789 : // Validation: encode back to ACE and compare the strings
790 0 : nsAutoCString ace;
791 0 : nsresult rv = UTF8toACE(out, ace, flag);
792 0 : NS_ENSURE_SUCCESS(rv, rv);
793 :
794 0 : if (flag == eStringPrepForDNS &&
795 0 : !ace.Equals(in, nsCaseInsensitiveCStringComparator())) {
796 0 : return NS_ERROR_MALFORMED_URI;
797 : }
798 :
799 0 : return NS_OK;
800 : }
801 :
802 0 : bool nsIDNService::isInWhitelist(const nsACString &host)
803 : {
804 0 : if (!NS_IsMainThread()) {
805 0 : mLock.AssertCurrentThreadOwns();
806 : }
807 :
808 0 : if (mIDNUseWhitelist && mIDNWhitelistPrefBranch) {
809 0 : nsAutoCString tld(host);
810 : // make sure the host is ACE for lookup and check that there are no
811 : // unassigned codepoints
812 0 : if (!IsASCII(tld) && NS_FAILED(UTF8toACE(tld, tld, eStringPrepForDNS))) {
813 0 : return false;
814 : }
815 :
816 : // truncate trailing dots first
817 0 : tld.Trim(".");
818 0 : int32_t pos = tld.RFind(".");
819 0 : if (pos == kNotFound)
820 0 : return false;
821 :
822 0 : tld.Cut(0, pos + 1);
823 :
824 : bool safe;
825 0 : if (NS_SUCCEEDED(mIDNWhitelistPrefBranch->GetBoolPref(tld.get(), &safe)))
826 0 : return safe;
827 : }
828 :
829 0 : return false;
830 : }
831 :
832 0 : bool nsIDNService::isLabelSafe(const nsAString &label)
833 : {
834 0 : if (!NS_IsMainThread()) {
835 0 : mLock.AssertCurrentThreadOwns();
836 : }
837 :
838 0 : if (!isOnlySafeChars(PromiseFlatString(label), mIDNBlacklist)) {
839 0 : return false;
840 : }
841 :
842 : // We should never get here if the label is ASCII
843 0 : NS_ASSERTION(!IsASCII(label), "ASCII label in IDN checking");
844 0 : if (mRestrictionProfile == eASCIIOnlyProfile) {
845 0 : return false;
846 : }
847 :
848 0 : nsAString::const_iterator current, end;
849 0 : label.BeginReading(current);
850 0 : label.EndReading(end);
851 :
852 0 : Script lastScript = Script::INVALID;
853 0 : uint32_t previousChar = 0;
854 0 : uint32_t savedNumberingSystem = 0;
855 : // Simplified/Traditional Chinese check temporarily disabled -- bug 857481
856 : #if 0
857 : HanVariantType savedHanVariant = HVT_NotHan;
858 : #endif
859 :
860 0 : int32_t savedScript = -1;
861 :
862 0 : while (current != end) {
863 0 : uint32_t ch = *current++;
864 :
865 0 : if (NS_IS_HIGH_SURROGATE(ch) && current != end &&
866 0 : NS_IS_LOW_SURROGATE(*current)) {
867 0 : ch = SURROGATE_TO_UCS4(ch, *current++);
868 : }
869 :
870 0 : IdentifierType idType = GetIdentifierType(ch);
871 0 : if (idType == IDTYPE_RESTRICTED) {
872 0 : return false;
873 : }
874 0 : MOZ_ASSERT(idType == IDTYPE_ALLOWED);
875 :
876 : // Check for mixed script
877 0 : Script script = GetScriptCode(ch);
878 0 : if (script != Script::COMMON &&
879 0 : script != Script::INHERITED &&
880 : script != lastScript) {
881 0 : if (illegalScriptCombo(script, savedScript)) {
882 0 : return false;
883 : }
884 0 : lastScript = script;
885 : }
886 :
887 : // Check for mixed numbering systems
888 0 : if (GetGeneralCategory(ch) ==
889 : HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER) {
890 0 : uint32_t zeroCharacter = ch - GetNumericValue(ch);
891 0 : if (savedNumberingSystem == 0) {
892 : // If we encounter a decimal number, save the zero character from that
893 : // numbering system.
894 0 : savedNumberingSystem = zeroCharacter;
895 0 : } else if (zeroCharacter != savedNumberingSystem) {
896 0 : return false;
897 : }
898 : }
899 :
900 : // Check for consecutive non-spacing marks
901 0 : if (previousChar != 0 &&
902 0 : previousChar == ch &&
903 0 : GetGeneralCategory(ch) == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) {
904 0 : return false;
905 : }
906 :
907 : // Simplified/Traditional Chinese check temporarily disabled -- bug 857481
908 : #if 0
909 :
910 : // Check for both simplified-only and traditional-only Chinese characters
911 : HanVariantType hanVariant = GetHanVariant(ch);
912 : if (hanVariant == HVT_SimplifiedOnly || hanVariant == HVT_TraditionalOnly) {
913 : if (savedHanVariant == HVT_NotHan) {
914 : savedHanVariant = hanVariant;
915 : } else if (hanVariant != savedHanVariant) {
916 : return false;
917 : }
918 : }
919 : #endif
920 :
921 0 : previousChar = ch;
922 : }
923 0 : return true;
924 : }
925 :
926 : // Scripts that we care about in illegalScriptCombo
927 : static const Script scriptTable[] = {
928 : Script::BOPOMOFO, Script::CYRILLIC, Script::GREEK,
929 : Script::HANGUL, Script::HAN, Script::HIRAGANA,
930 : Script::KATAKANA, Script::LATIN };
931 :
932 : #define BOPO 0
933 : #define CYRL 1
934 : #define GREK 2
935 : #define HANG 3
936 : #define HANI 4
937 : #define HIRA 5
938 : #define KATA 6
939 : #define LATN 7
940 : #define OTHR 8
941 : #define JPAN 9 // Latin + Han + Hiragana + Katakana
942 : #define CHNA 10 // Latin + Han + Bopomofo
943 : #define KORE 11 // Latin + Han + Hangul
944 : #define HNLT 12 // Latin + Han (could be any of the above combinations)
945 : #define FAIL 13
946 :
947 0 : static inline int32_t findScriptIndex(Script aScript)
948 : {
949 0 : int32_t tableLength = mozilla::ArrayLength(scriptTable);
950 0 : for (int32_t index = 0; index < tableLength; ++index) {
951 0 : if (aScript == scriptTable[index]) {
952 0 : return index;
953 : }
954 : }
955 0 : return OTHR;
956 : }
957 :
958 : static const int32_t scriptComboTable[13][9] = {
959 : /* thisScript: BOPO CYRL GREK HANG HANI HIRA KATA LATN OTHR
960 : * savedScript */
961 : /* BOPO */ { BOPO, FAIL, FAIL, FAIL, CHNA, FAIL, FAIL, CHNA, FAIL },
962 : /* CYRL */ { FAIL, CYRL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL },
963 : /* GREK */ { FAIL, FAIL, GREK, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL },
964 : /* HANG */ { FAIL, FAIL, FAIL, HANG, KORE, FAIL, FAIL, KORE, FAIL },
965 : /* HANI */ { CHNA, FAIL, FAIL, KORE, HANI, JPAN, JPAN, HNLT, FAIL },
966 : /* HIRA */ { FAIL, FAIL, FAIL, FAIL, JPAN, HIRA, JPAN, JPAN, FAIL },
967 : /* KATA */ { FAIL, FAIL, FAIL, FAIL, JPAN, JPAN, KATA, JPAN, FAIL },
968 : /* LATN */ { CHNA, FAIL, FAIL, KORE, HNLT, JPAN, JPAN, LATN, OTHR },
969 : /* OTHR */ { FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, OTHR, FAIL },
970 : /* JPAN */ { FAIL, FAIL, FAIL, FAIL, JPAN, JPAN, JPAN, JPAN, FAIL },
971 : /* CHNA */ { CHNA, FAIL, FAIL, FAIL, CHNA, FAIL, FAIL, CHNA, FAIL },
972 : /* KORE */ { FAIL, FAIL, FAIL, KORE, KORE, FAIL, FAIL, KORE, FAIL },
973 : /* HNLT */ { CHNA, FAIL, FAIL, KORE, HNLT, JPAN, JPAN, HNLT, FAIL }
974 : };
975 :
976 0 : bool nsIDNService::illegalScriptCombo(Script script, int32_t& savedScript)
977 : {
978 0 : if (!NS_IsMainThread()) {
979 0 : mLock.AssertCurrentThreadOwns();
980 : }
981 :
982 0 : if (savedScript == -1) {
983 0 : savedScript = findScriptIndex(script);
984 0 : return false;
985 : }
986 :
987 0 : savedScript = scriptComboTable[savedScript] [findScriptIndex(script)];
988 : /*
989 : * Special case combinations that depend on which profile is in use
990 : * In the Highly Restrictive profile Latin is not allowed with any
991 : * other script
992 : *
993 : * In the Moderately Restrictive profile Latin mixed with any other
994 : * single script is allowed.
995 : */
996 0 : return ((savedScript == OTHR &&
997 0 : mRestrictionProfile == eHighlyRestrictiveProfile) ||
998 0 : savedScript == FAIL);
999 : }
1000 :
1001 : #undef BOPO
1002 : #undef CYRL
1003 : #undef GREK
1004 : #undef HANG
1005 : #undef HANI
1006 : #undef HIRA
1007 : #undef KATA
1008 : #undef LATN
1009 : #undef OTHR
1010 : #undef JPAN
1011 : #undef CHNA
1012 : #undef KORE
1013 : #undef HNLT
1014 : #undef FAIL
|