Line data Source code
1 : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 : /* This Source Code Form is subject to the terms of the Mozilla Public
4 : * License, v. 2.0. If a copy of the MPL was not distributed with this
5 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 :
7 : // This service reads a file of rules describing TLD-like domain names. For a
8 : // complete description of the expected file format and parsing rules, see
9 : // http://wiki.mozilla.org/Gecko:Effective_TLD_Service
10 :
11 : #include "mozilla/ArrayUtils.h"
12 : #include "mozilla/MemoryReporting.h"
13 :
14 : #include "nsEffectiveTLDService.h"
15 : #include "nsIIDNService.h"
16 : #include "nsNetUtil.h"
17 : #include "prnetdb.h"
18 : #include "nsIURI.h"
19 : #include "nsNetCID.h"
20 : #include "nsServiceManagerUtils.h"
21 :
22 : using namespace mozilla;
23 :
24 117 : NS_IMPL_ISUPPORTS(nsEffectiveTLDService, nsIEffectiveTLDService,
25 : nsIMemoryReporter)
26 :
27 : // ----------------------------------------------------------------------
28 :
29 : #define ETLD_STR_NUM_1(line) str##line
30 : #define ETLD_STR_NUM(line) ETLD_STR_NUM_1(line)
31 : #define ETLD_ENTRY_OFFSET(name) offsetof(struct etld_string_list, ETLD_STR_NUM(__LINE__))
32 :
33 : const ETLDEntry ETLDEntry::entries[] = {
34 : #define ETLD_ENTRY(name, ex, wild) { ETLD_ENTRY_OFFSET(name), ex, wild },
35 : #include "etld_data.inc"
36 : #undef ETLD_ENTRY
37 : };
38 :
39 : const union ETLDEntry::etld_strings ETLDEntry::strings = {
40 : {
41 : #define ETLD_ENTRY(name, ex, wild) name,
42 : #include "etld_data.inc"
43 : #undef ETLD_ENTRY
44 : }
45 : };
46 :
47 : /* static */ const ETLDEntry*
48 59 : ETLDEntry::GetEntry(const char* aDomain)
49 : {
50 : size_t i;
51 118 : if (BinarySearchIf(entries, 0, ArrayLength(ETLDEntry::entries),
52 118 : Cmp(aDomain), &i)) {
53 0 : return &entries[i];
54 : }
55 59 : return nullptr;
56 : }
57 :
58 : // Dummy function to statically ensure that our indices don't overflow
59 : // the storage provided for them.
60 : void
61 0 : ETLDEntry::FuncForStaticAsserts(void)
62 : {
63 : #define ETLD_ENTRY(name, ex, wild) \
64 : static_assert(ETLD_ENTRY_OFFSET(name) < (1 << ETLD_ENTRY_N_INDEX_BITS), \
65 : "invalid strtab index");
66 : #include "etld_data.inc"
67 : #undef ETLD_ENTRY
68 0 : }
69 :
70 : #undef ETLD_ENTRY_OFFSET
71 : #undef ETLD_STR_NUM
72 : #undef ETLD_STR_NUM1
73 :
74 : // ----------------------------------------------------------------------
75 :
76 : static nsEffectiveTLDService *gService = nullptr;
77 :
78 2 : nsEffectiveTLDService::nsEffectiveTLDService()
79 : {
80 2 : }
81 :
82 : nsresult
83 2 : nsEffectiveTLDService::Init()
84 : {
85 : nsresult rv;
86 2 : mIDNService = do_GetService(NS_IDNSERVICE_CONTRACTID, &rv);
87 2 : if (NS_FAILED(rv)) return rv;
88 :
89 : #ifdef DEBUG
90 : // Sanity-check the eTLD entries.
91 12544 : for (uint32_t i = 0; i < ArrayLength(ETLDEntry::entries); i++) {
92 12542 : const char* domain = ETLDEntry::entries[i].GetEffectiveTLDName();
93 25084 : nsDependentCString name(domain);
94 25084 : nsAutoCString normalizedName(domain);
95 12542 : MOZ_ASSERT(NS_SUCCEEDED(NormalizeHostname(normalizedName)),
96 : "normalization failure!");
97 12542 : MOZ_ASSERT(name.Equals(normalizedName), "domain not normalized!");
98 :
99 : // Domains must be in sorted order for binary search to work.
100 12542 : if (i > 0) {
101 12540 : const char* domain0 = ETLDEntry::entries[i - 1].GetEffectiveTLDName();
102 12540 : MOZ_ASSERT(strcmp(domain0, domain) < 0, "domains not in sorted order!");
103 : }
104 : }
105 : #endif
106 :
107 2 : MOZ_ASSERT(!gService);
108 2 : gService = this;
109 2 : RegisterWeakMemoryReporter(this);
110 :
111 2 : return NS_OK;
112 : }
113 :
114 0 : nsEffectiveTLDService::~nsEffectiveTLDService()
115 : {
116 0 : UnregisterWeakMemoryReporter(this);
117 0 : gService = nullptr;
118 0 : }
119 :
120 0 : MOZ_DEFINE_MALLOC_SIZE_OF(EffectiveTLDServiceMallocSizeOf)
121 :
122 : // The amount of heap memory measured here is tiny. It used to be bigger when
123 : // nsEffectiveTLDService used a separate hash table instead of binary search.
124 : // Nonetheless, we keep this code here in anticipation of bug 1083971 which will
125 : // change ETLDEntries::entries to a heap-allocated array modifiable at runtime.
126 : NS_IMETHODIMP
127 0 : nsEffectiveTLDService::CollectReports(nsIHandleReportCallback* aHandleReport,
128 : nsISupports* aData, bool aAnonymize)
129 : {
130 0 : MOZ_COLLECT_REPORT(
131 : "explicit/network/effective-TLD-service", KIND_HEAP, UNITS_BYTES,
132 : SizeOfIncludingThis(EffectiveTLDServiceMallocSizeOf),
133 0 : "Memory used by the effective TLD service.");
134 :
135 0 : return NS_OK;
136 : }
137 :
138 : size_t
139 0 : nsEffectiveTLDService::SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf)
140 : {
141 0 : size_t n = aMallocSizeOf(this);
142 :
143 : // Measurement of the following members may be added later if DMD finds it is
144 : // worthwhile:
145 : // - mIDNService
146 :
147 0 : return n;
148 : }
149 :
150 : // External function for dealing with URI's correctly.
151 : // Pulls out the host portion from an nsIURI, and calls through to
152 : // GetPublicSuffixFromHost().
153 : NS_IMETHODIMP
154 0 : nsEffectiveTLDService::GetPublicSuffix(nsIURI *aURI,
155 : nsACString &aPublicSuffix)
156 : {
157 0 : NS_ENSURE_ARG_POINTER(aURI);
158 :
159 0 : nsCOMPtr<nsIURI> innerURI = NS_GetInnermostURI(aURI);
160 0 : NS_ENSURE_ARG_POINTER(innerURI);
161 :
162 0 : nsAutoCString host;
163 0 : nsresult rv = innerURI->GetAsciiHost(host);
164 0 : if (NS_FAILED(rv)) return rv;
165 :
166 0 : return GetBaseDomainInternal(host, 0, aPublicSuffix);
167 : }
168 :
169 : // External function for dealing with URI's correctly.
170 : // Pulls out the host portion from an nsIURI, and calls through to
171 : // GetBaseDomainFromHost().
172 : NS_IMETHODIMP
173 58 : nsEffectiveTLDService::GetBaseDomain(nsIURI *aURI,
174 : uint32_t aAdditionalParts,
175 : nsACString &aBaseDomain)
176 : {
177 58 : NS_ENSURE_ARG_POINTER(aURI);
178 58 : NS_ENSURE_TRUE( ((int32_t)aAdditionalParts) >= 0, NS_ERROR_INVALID_ARG);
179 :
180 116 : nsCOMPtr<nsIURI> innerURI = NS_GetInnermostURI(aURI);
181 58 : NS_ENSURE_ARG_POINTER(innerURI);
182 :
183 116 : nsAutoCString host;
184 58 : nsresult rv = innerURI->GetAsciiHost(host);
185 58 : if (NS_FAILED(rv)) return rv;
186 :
187 58 : return GetBaseDomainInternal(host, aAdditionalParts + 1, aBaseDomain);
188 : }
189 :
190 : // External function for dealing with a host string directly: finds the public
191 : // suffix (e.g. co.uk) for the given hostname. See GetBaseDomainInternal().
192 : NS_IMETHODIMP
193 0 : nsEffectiveTLDService::GetPublicSuffixFromHost(const nsACString &aHostname,
194 : nsACString &aPublicSuffix)
195 : {
196 : // Create a mutable copy of the hostname and normalize it to ACE.
197 : // This will fail if the hostname includes invalid characters.
198 0 : nsAutoCString normHostname(aHostname);
199 0 : nsresult rv = NormalizeHostname(normHostname);
200 0 : if (NS_FAILED(rv)) return rv;
201 :
202 0 : return GetBaseDomainInternal(normHostname, 0, aPublicSuffix);
203 : }
204 :
205 : // External function for dealing with a host string directly: finds the base
206 : // domain (e.g. www.co.uk) for the given hostname and number of subdomain parts
207 : // requested. See GetBaseDomainInternal().
208 : NS_IMETHODIMP
209 2 : nsEffectiveTLDService::GetBaseDomainFromHost(const nsACString &aHostname,
210 : uint32_t aAdditionalParts,
211 : nsACString &aBaseDomain)
212 : {
213 2 : NS_ENSURE_TRUE( ((int32_t)aAdditionalParts) >= 0, NS_ERROR_INVALID_ARG);
214 :
215 : // Create a mutable copy of the hostname and normalize it to ACE.
216 : // This will fail if the hostname includes invalid characters.
217 4 : nsAutoCString normHostname(aHostname);
218 2 : nsresult rv = NormalizeHostname(normHostname);
219 2 : if (NS_FAILED(rv)) return rv;
220 :
221 2 : return GetBaseDomainInternal(normHostname, aAdditionalParts + 1, aBaseDomain);
222 : }
223 :
224 : NS_IMETHODIMP
225 1 : nsEffectiveTLDService::GetNextSubDomain(const nsACString& aHostname,
226 : nsACString& aBaseDomain)
227 : {
228 : // Create a mutable copy of the hostname and normalize it to ACE.
229 : // This will fail if the hostname includes invalid characters.
230 2 : nsAutoCString normHostname(aHostname);
231 1 : nsresult rv = NormalizeHostname(normHostname);
232 1 : NS_ENSURE_SUCCESS(rv, rv);
233 :
234 1 : return GetBaseDomainInternal(normHostname, -1, aBaseDomain);
235 : }
236 :
237 : // Finds the base domain for a host, with requested number of additional parts.
238 : // This will fail, generating an error, if the host is an IPv4/IPv6 address,
239 : // if more subdomain parts are requested than are available, or if the hostname
240 : // includes characters that are not valid in a URL. Normalization is performed
241 : // on the host string and the result will be in UTF8.
242 : nsresult
243 61 : nsEffectiveTLDService::GetBaseDomainInternal(nsCString &aHostname,
244 : int32_t aAdditionalParts,
245 : nsACString &aBaseDomain)
246 : {
247 61 : if (aHostname.IsEmpty())
248 0 : return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS;
249 :
250 : // chomp any trailing dot, and keep track of it for later
251 61 : bool trailingDot = aHostname.Last() == '.';
252 61 : if (trailingDot)
253 0 : aHostname.Truncate(aHostname.Length() - 1);
254 :
255 : // check the edge cases of the host being '.' or having a second trailing '.',
256 : // since subsequent checks won't catch it.
257 61 : if (aHostname.IsEmpty() || aHostname.Last() == '.')
258 0 : return NS_ERROR_INVALID_ARG;
259 :
260 : // Check if we're dealing with an IPv4/IPv6 hostname, and return
261 : PRNetAddr addr;
262 61 : PRStatus result = PR_StringToNetAddr(aHostname.get(), &addr);
263 61 : if (result == PR_SUCCESS)
264 2 : return NS_ERROR_HOST_IS_IP_ADDRESS;
265 :
266 : // Walk up the domain tree, most specific to least specific,
267 : // looking for matches at each level. Note that a given level may
268 : // have multiple attributes (e.g. IsWild() and IsNormal()).
269 59 : const char *prevDomain = nullptr;
270 59 : const char *currDomain = aHostname.get();
271 59 : const char *nextDot = strchr(currDomain, '.');
272 59 : const char *end = currDomain + aHostname.Length();
273 : // Default value of *eTLD is currDomain as set in the while loop below
274 59 : const char *eTLD = nullptr;
275 : while (true) {
276 : // sanity check the string we're about to look up: it should not begin with
277 : // a '.'; this would mean the hostname began with a '.' or had an
278 : // embedded '..' sequence.
279 59 : if (*currDomain == '.')
280 0 : return NS_ERROR_INVALID_ARG;
281 :
282 : // Perform the lookup.
283 59 : const ETLDEntry* entry = ETLDEntry::GetEntry(currDomain);
284 59 : if (entry) {
285 0 : if (entry->IsWild() && prevDomain) {
286 : // wildcard rules imply an eTLD one level inferior to the match.
287 0 : eTLD = prevDomain;
288 0 : break;
289 : }
290 0 : if (entry->IsNormal() || !nextDot) {
291 : // specific match, or we've hit the top domain level
292 0 : eTLD = currDomain;
293 0 : break;
294 : }
295 0 : if (entry->IsException()) {
296 : // exception rules imply an eTLD one level superior to the match.
297 0 : eTLD = nextDot + 1;
298 0 : break;
299 : }
300 : }
301 59 : if (!nextDot) {
302 : // we've hit the top domain level; use it by default.
303 59 : eTLD = currDomain;
304 59 : break;
305 : }
306 :
307 0 : prevDomain = currDomain;
308 0 : currDomain = nextDot + 1;
309 0 : nextDot = strchr(currDomain, '.');
310 0 : }
311 :
312 : const char *begin, *iter;
313 59 : if (aAdditionalParts < 0) {
314 1 : NS_ASSERTION(aAdditionalParts == -1,
315 : "aAdditionalParts can't be negative and different from -1");
316 :
317 1 : for (iter = aHostname.get(); iter != eTLD && *iter != '.'; iter++);
318 :
319 1 : if (iter != eTLD) {
320 0 : iter++;
321 : }
322 1 : if (iter != eTLD) {
323 0 : aAdditionalParts = 0;
324 : }
325 : } else {
326 : // count off the number of requested domains.
327 58 : begin = aHostname.get();
328 58 : iter = eTLD;
329 :
330 : while (true) {
331 58 : if (iter == begin)
332 58 : break;
333 :
334 0 : if (*(--iter) == '.' && aAdditionalParts-- == 0) {
335 0 : ++iter;
336 0 : ++aAdditionalParts;
337 0 : break;
338 : }
339 : }
340 : }
341 :
342 59 : if (aAdditionalParts != 0)
343 59 : return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS;
344 :
345 0 : aBaseDomain = Substring(iter, end);
346 : // add on the trailing dot, if applicable
347 0 : if (trailingDot)
348 0 : aBaseDomain.Append('.');
349 :
350 0 : return NS_OK;
351 : }
352 :
353 : // Normalizes the given hostname, component by component. ASCII/ACE
354 : // components are lower-cased, and UTF-8 components are normalized per
355 : // RFC 3454 and converted to ACE.
356 : nsresult
357 12545 : nsEffectiveTLDService::NormalizeHostname(nsCString &aHostname)
358 : {
359 12545 : if (!IsASCII(aHostname)) {
360 0 : nsresult rv = mIDNService->ConvertUTF8toACE(aHostname, aHostname);
361 0 : if (NS_FAILED(rv))
362 0 : return rv;
363 : }
364 :
365 12545 : ToLowerCase(aHostname);
366 12545 : return NS_OK;
367 : }
|