/src/mozilla-central/netwerk/dns/nsEffectiveTLDService.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
2 | | /* vim: set ts=8 sts=2 et sw=2 tw=80: */ |
3 | | /* This Source Code Form is subject to the terms of the Mozilla Public |
4 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
5 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
6 | | |
7 | | // This service reads a file of rules describing TLD-like domain names. For a |
8 | | // complete description of the expected file format and parsing rules, see |
9 | | // http://wiki.mozilla.org/Gecko:Effective_TLD_Service |
10 | | |
11 | | #include "mozilla/ArrayUtils.h" |
12 | | #include "mozilla/HashFunctions.h" |
13 | | #include "mozilla/Maybe.h" |
14 | | #include "mozilla/MemoryReporting.h" |
15 | | |
16 | | #include "MainThreadUtils.h" |
17 | | #include "nsEffectiveTLDService.h" |
18 | | #include "nsIIDNService.h" |
19 | | #include "nsNetUtil.h" |
20 | | #include "prnetdb.h" |
21 | | #include "nsIURI.h" |
22 | | #include "nsNetCID.h" |
23 | | #include "nsServiceManagerUtils.h" |
24 | | |
25 | | namespace etld_dafsa { |
26 | | |
27 | | // Generated file that includes kDafsa |
28 | | #include "etld_data.inc" |
29 | | |
30 | | } // namespace etld_dafsa |
31 | | |
32 | | using namespace mozilla; |
33 | | |
34 | | NS_IMPL_ISUPPORTS(nsEffectiveTLDService, nsIEffectiveTLDService, |
35 | | nsIMemoryReporter) |
36 | | |
37 | | // ---------------------------------------------------------------------- |
38 | | |
39 | | static nsEffectiveTLDService *gService = nullptr; |
40 | | |
41 | | nsEffectiveTLDService::nsEffectiveTLDService() |
42 | | : mIDNService() |
43 | | , mGraph(etld_dafsa::kDafsa) |
44 | 0 | { |
45 | 0 | } |
46 | | |
47 | | nsresult |
48 | | nsEffectiveTLDService::Init() |
49 | 0 | { |
50 | 0 | nsresult rv; |
51 | 0 | mIDNService = do_GetService(NS_IDNSERVICE_CONTRACTID, &rv); |
52 | 0 | if (NS_FAILED(rv)) return rv; |
53 | 0 | |
54 | 0 | MOZ_ASSERT(!gService); |
55 | 0 | gService = this; |
56 | 0 | RegisterWeakMemoryReporter(this); |
57 | 0 |
|
58 | 0 | return NS_OK; |
59 | 0 | } |
60 | | |
61 | | nsEffectiveTLDService::~nsEffectiveTLDService() |
62 | 0 | { |
63 | 0 | UnregisterWeakMemoryReporter(this); |
64 | 0 | gService = nullptr; |
65 | 0 | } |
66 | | |
67 | | MOZ_DEFINE_MALLOC_SIZE_OF(EffectiveTLDServiceMallocSizeOf) |
68 | | |
69 | | // The amount of heap memory measured here is tiny. It used to be bigger when |
70 | | // nsEffectiveTLDService used a separate hash table instead of binary search. |
71 | | // Nonetheless, we keep this code here in anticipation of bug 1083971 which will |
72 | | // change ETLDEntries::entries to a heap-allocated array modifiable at runtime. |
73 | | NS_IMETHODIMP |
74 | | nsEffectiveTLDService::CollectReports(nsIHandleReportCallback* aHandleReport, |
75 | | nsISupports* aData, bool aAnonymize) |
76 | 0 | { |
77 | 0 | MOZ_COLLECT_REPORT( |
78 | 0 | "explicit/network/effective-TLD-service", KIND_HEAP, UNITS_BYTES, |
79 | 0 | SizeOfIncludingThis(EffectiveTLDServiceMallocSizeOf), |
80 | 0 | "Memory used by the effective TLD service."); |
81 | 0 |
|
82 | 0 | return NS_OK; |
83 | 0 | } |
84 | | |
85 | | size_t |
86 | | nsEffectiveTLDService::SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) |
87 | 0 | { |
88 | 0 | size_t n = aMallocSizeOf(this); |
89 | 0 |
|
90 | 0 | // Measurement of the following members may be added later if DMD finds it is |
91 | 0 | // worthwhile: |
92 | 0 | // - mIDNService |
93 | 0 |
|
94 | 0 | return n; |
95 | 0 | } |
96 | | |
97 | | // External function for dealing with URI's correctly. |
98 | | // Pulls out the host portion from an nsIURI, and calls through to |
99 | | // GetPublicSuffixFromHost(). |
100 | | NS_IMETHODIMP |
101 | | nsEffectiveTLDService::GetPublicSuffix(nsIURI *aURI, |
102 | | nsACString &aPublicSuffix) |
103 | 0 | { |
104 | 0 | NS_ENSURE_ARG_POINTER(aURI); |
105 | 0 |
|
106 | 0 | nsCOMPtr<nsIURI> innerURI = NS_GetInnermostURI(aURI); |
107 | 0 | NS_ENSURE_ARG_POINTER(innerURI); |
108 | 0 |
|
109 | 0 | nsAutoCString host; |
110 | 0 | nsresult rv = innerURI->GetAsciiHost(host); |
111 | 0 | if (NS_FAILED(rv)) return rv; |
112 | 0 | |
113 | 0 | return GetBaseDomainInternal(host, 0, aPublicSuffix); |
114 | 0 | } |
115 | | |
116 | | // External function for dealing with URI's correctly. |
117 | | // Pulls out the host portion from an nsIURI, and calls through to |
118 | | // GetBaseDomainFromHost(). |
119 | | NS_IMETHODIMP |
120 | | nsEffectiveTLDService::GetBaseDomain(nsIURI *aURI, |
121 | | uint32_t aAdditionalParts, |
122 | | nsACString &aBaseDomain) |
123 | 0 | { |
124 | 0 | NS_ENSURE_ARG_POINTER(aURI); |
125 | 0 | NS_ENSURE_TRUE( ((int32_t)aAdditionalParts) >= 0, NS_ERROR_INVALID_ARG); |
126 | 0 |
|
127 | 0 | nsCOMPtr<nsIURI> innerURI = NS_GetInnermostURI(aURI); |
128 | 0 | NS_ENSURE_ARG_POINTER(innerURI); |
129 | 0 |
|
130 | 0 | nsAutoCString host; |
131 | 0 | nsresult rv = innerURI->GetAsciiHost(host); |
132 | 0 | if (NS_FAILED(rv)) return rv; |
133 | 0 | |
134 | 0 | return GetBaseDomainInternal(host, aAdditionalParts + 1, aBaseDomain); |
135 | 0 | } |
136 | | |
137 | | // External function for dealing with a host string directly: finds the public |
138 | | // suffix (e.g. co.uk) for the given hostname. See GetBaseDomainInternal(). |
139 | | NS_IMETHODIMP |
140 | | nsEffectiveTLDService::GetPublicSuffixFromHost(const nsACString &aHostname, |
141 | | nsACString &aPublicSuffix) |
142 | 0 | { |
143 | 0 | // Create a mutable copy of the hostname and normalize it to ACE. |
144 | 0 | // This will fail if the hostname includes invalid characters. |
145 | 0 | nsAutoCString normHostname(aHostname); |
146 | 0 | nsresult rv = NormalizeHostname(normHostname); |
147 | 0 | if (NS_FAILED(rv)) return rv; |
148 | 0 | |
149 | 0 | return GetBaseDomainInternal(normHostname, 0, aPublicSuffix); |
150 | 0 | } |
151 | | |
152 | | // External function for dealing with a host string directly: finds the base |
153 | | // domain (e.g. www.co.uk) for the given hostname and number of subdomain parts |
154 | | // requested. See GetBaseDomainInternal(). |
155 | | NS_IMETHODIMP |
156 | | nsEffectiveTLDService::GetBaseDomainFromHost(const nsACString &aHostname, |
157 | | uint32_t aAdditionalParts, |
158 | | nsACString &aBaseDomain) |
159 | 0 | { |
160 | 0 | NS_ENSURE_TRUE( ((int32_t)aAdditionalParts) >= 0, NS_ERROR_INVALID_ARG); |
161 | 0 |
|
162 | 0 | // Create a mutable copy of the hostname and normalize it to ACE. |
163 | 0 | // This will fail if the hostname includes invalid characters. |
164 | 0 | nsAutoCString normHostname(aHostname); |
165 | 0 | nsresult rv = NormalizeHostname(normHostname); |
166 | 0 | if (NS_FAILED(rv)) return rv; |
167 | 0 | |
168 | 0 | return GetBaseDomainInternal(normHostname, aAdditionalParts + 1, aBaseDomain); |
169 | 0 | } |
170 | | |
171 | | NS_IMETHODIMP |
172 | | nsEffectiveTLDService::GetNextSubDomain(const nsACString& aHostname, |
173 | | nsACString& aBaseDomain) |
174 | 0 | { |
175 | 0 | // Create a mutable copy of the hostname and normalize it to ACE. |
176 | 0 | // This will fail if the hostname includes invalid characters. |
177 | 0 | nsAutoCString normHostname(aHostname); |
178 | 0 | nsresult rv = NormalizeHostname(normHostname); |
179 | 0 | NS_ENSURE_SUCCESS(rv, rv); |
180 | 0 |
|
181 | 0 | return GetBaseDomainInternal(normHostname, -1, aBaseDomain); |
182 | 0 | } |
183 | | |
184 | | // Finds the base domain for a host, with requested number of additional parts. |
185 | | // This will fail, generating an error, if the host is an IPv4/IPv6 address, |
186 | | // if more subdomain parts are requested than are available, or if the hostname |
187 | | // includes characters that are not valid in a URL. Normalization is performed |
188 | | // on the host string and the result will be in UTF8. |
189 | | nsresult |
190 | | nsEffectiveTLDService::GetBaseDomainInternal(nsCString &aHostname, |
191 | | int32_t aAdditionalParts, |
192 | | nsACString &aBaseDomain) |
193 | 0 | { |
194 | 0 | const int kExceptionRule = 1; |
195 | 0 | const int kWildcardRule = 2; |
196 | 0 |
|
197 | 0 | if (aHostname.IsEmpty()) |
198 | 0 | return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS; |
199 | 0 | |
200 | 0 | // chomp any trailing dot, and keep track of it for later |
201 | 0 | bool trailingDot = aHostname.Last() == '.'; |
202 | 0 | if (trailingDot) |
203 | 0 | aHostname.Truncate(aHostname.Length() - 1); |
204 | 0 |
|
205 | 0 | // check the edge cases of the host being '.' or having a second trailing '.', |
206 | 0 | // since subsequent checks won't catch it. |
207 | 0 | if (aHostname.IsEmpty() || aHostname.Last() == '.') |
208 | 0 | return NS_ERROR_INVALID_ARG; |
209 | 0 | |
210 | 0 | // Check if we're dealing with an IPv4/IPv6 hostname, and return |
211 | 0 | PRNetAddr addr; |
212 | 0 | PRStatus result = PR_StringToNetAddr(aHostname.get(), &addr); |
213 | 0 | if (result == PR_SUCCESS) |
214 | 0 | return NS_ERROR_HOST_IS_IP_ADDRESS; |
215 | 0 | |
216 | 0 | // Lookup in the cache if this is a normal query. This is restricted to |
217 | 0 | // main thread-only as the cache is not thread-safe. |
218 | 0 | Maybe<TldCache::Entry> entry; |
219 | 0 | if (aAdditionalParts == 1 && NS_IsMainThread()) { |
220 | 0 | auto p = mMruTable.Lookup(aHostname); |
221 | 0 | if (p) { |
222 | 0 | // There was a match, just return the cached value. |
223 | 0 | aBaseDomain = p.Data().mBaseDomain; |
224 | 0 | if (trailingDot) { |
225 | 0 | aBaseDomain.Append('.'); |
226 | 0 | } |
227 | 0 |
|
228 | 0 | return NS_OK; |
229 | 0 | } |
230 | 0 |
|
231 | 0 | entry = Some(p); |
232 | 0 | } |
233 | 0 |
|
234 | 0 | // Walk up the domain tree, most specific to least specific, |
235 | 0 | // looking for matches at each level. Note that a given level may |
236 | 0 | // have multiple attributes (e.g. IsWild() and IsNormal()). |
237 | 0 | const char *prevDomain = nullptr; |
238 | 0 | const char *currDomain = aHostname.get(); |
239 | 0 | const char *nextDot = strchr(currDomain, '.'); |
240 | 0 | const char *end = currDomain + aHostname.Length(); |
241 | 0 | // Default value of *eTLD is currDomain as set in the while loop below |
242 | 0 | const char *eTLD = nullptr; |
243 | 0 | while (true) { |
244 | 0 | // sanity check the string we're about to look up: it should not begin with |
245 | 0 | // a '.'; this would mean the hostname began with a '.' or had an |
246 | 0 | // embedded '..' sequence. |
247 | 0 | if (*currDomain == '.') |
248 | 0 | return NS_ERROR_INVALID_ARG; |
249 | 0 | |
250 | 0 | // Perform the lookup. |
251 | 0 | const int result = mGraph.Lookup(Substring(currDomain, end)); |
252 | 0 | if (result != Dafsa::kKeyNotFound) { |
253 | 0 | if (result == kWildcardRule && prevDomain) { |
254 | 0 | // wildcard rules imply an eTLD one level inferior to the match. |
255 | 0 | eTLD = prevDomain; |
256 | 0 | break; |
257 | 0 | } |
258 | 0 | if ((result == kWildcardRule || result != kExceptionRule) || !nextDot) { |
259 | 0 | // specific match, or we've hit the top domain level |
260 | 0 | eTLD = currDomain; |
261 | 0 | break; |
262 | 0 | } |
263 | 0 | if (result == kExceptionRule) { |
264 | 0 | // exception rules imply an eTLD one level superior to the match. |
265 | 0 | eTLD = nextDot + 1; |
266 | 0 | break; |
267 | 0 | } |
268 | 0 | } |
269 | 0 | if (!nextDot) { |
270 | 0 | // we've hit the top domain level; use it by default. |
271 | 0 | eTLD = currDomain; |
272 | 0 | break; |
273 | 0 | } |
274 | 0 | |
275 | 0 | prevDomain = currDomain; |
276 | 0 | currDomain = nextDot + 1; |
277 | 0 | nextDot = strchr(currDomain, '.'); |
278 | 0 | } |
279 | 0 |
|
280 | 0 | const char *begin, *iter; |
281 | 0 | if (aAdditionalParts < 0) { |
282 | 0 | NS_ASSERTION(aAdditionalParts == -1, |
283 | 0 | "aAdditionalParts can't be negative and different from -1"); |
284 | 0 |
|
285 | 0 | for (iter = aHostname.get(); iter != eTLD && *iter != '.'; iter++); |
286 | 0 |
|
287 | 0 | if (iter != eTLD) { |
288 | 0 | iter++; |
289 | 0 | } |
290 | 0 | if (iter != eTLD) { |
291 | 0 | aAdditionalParts = 0; |
292 | 0 | } |
293 | 0 | } else { |
294 | 0 | // count off the number of requested domains. |
295 | 0 | begin = aHostname.get(); |
296 | 0 | iter = eTLD; |
297 | 0 |
|
298 | 0 | while (true) { |
299 | 0 | if (iter == begin) |
300 | 0 | break; |
301 | 0 | |
302 | 0 | if (*(--iter) == '.' && aAdditionalParts-- == 0) { |
303 | 0 | ++iter; |
304 | 0 | ++aAdditionalParts; |
305 | 0 | break; |
306 | 0 | } |
307 | 0 | } |
308 | 0 | } |
309 | 0 |
|
310 | 0 | if (aAdditionalParts != 0) |
311 | 0 | return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS; |
312 | 0 | |
313 | 0 | aBaseDomain = Substring(iter, end); |
314 | 0 |
|
315 | 0 | // Update the MRU table if in use. |
316 | 0 | if (entry) { |
317 | 0 | entry->Set(TLDCacheEntry{aHostname, nsCString(aBaseDomain)}); |
318 | 0 | } |
319 | 0 |
|
320 | 0 | // add on the trailing dot, if applicable |
321 | 0 | if (trailingDot) |
322 | 0 | aBaseDomain.Append('.'); |
323 | 0 |
|
324 | 0 | return NS_OK; |
325 | 0 | } |
326 | | |
327 | | // Normalizes the given hostname, component by component. ASCII/ACE |
328 | | // components are lower-cased, and UTF-8 components are normalized per |
329 | | // RFC 3454 and converted to ACE. |
330 | | nsresult |
331 | | nsEffectiveTLDService::NormalizeHostname(nsCString &aHostname) |
332 | 0 | { |
333 | 0 | if (!IsASCII(aHostname)) { |
334 | 0 | nsresult rv = mIDNService->ConvertUTF8toACE(aHostname, aHostname); |
335 | 0 | if (NS_FAILED(rv)) |
336 | 0 | return rv; |
337 | 0 | } |
338 | 0 | |
339 | 0 | ToLowerCase(aHostname); |
340 | 0 | return NS_OK; |
341 | 0 | } |
342 | | |
343 | | NS_IMETHODIMP |
344 | | nsEffectiveTLDService::HasRootDomain(const nsACString& aInput, |
345 | | const nsACString& aHost, |
346 | | bool* aResult) |
347 | 0 | { |
348 | 0 | if (NS_WARN_IF(!aResult)) { |
349 | 0 | return NS_ERROR_FAILURE; |
350 | 0 | } |
351 | 0 | |
352 | 0 | *aResult = false; |
353 | 0 |
|
354 | 0 | // If the strings are the same, we obviously have a match. |
355 | 0 | if (aInput == aHost) { |
356 | 0 | *aResult = true; |
357 | 0 | return NS_OK; |
358 | 0 | } |
359 | 0 | |
360 | 0 | // If aHost is not found, we know we do not have it as a root domain. |
361 | 0 | int32_t index = nsAutoCString(aInput).Find(aHost.BeginReading()); |
362 | 0 | if (index == kNotFound) { |
363 | 0 | return NS_OK; |
364 | 0 | } |
365 | 0 | |
366 | 0 | // Otherwise, we have aHost as our root domain iff the index of aHost is |
367 | 0 | // aHost.length subtracted from our length and (since we do not have an |
368 | 0 | // exact match) the character before the index is a dot or slash. |
369 | 0 | *aResult = index > 0 && |
370 | 0 | (uint32_t)index == aInput.Length() - aHost.Length() && |
371 | 0 | (aInput[index - 1] == '.' || aInput[index - 1] == '/'); |
372 | 0 | return NS_OK; |
373 | 0 | } |