/src/mozilla-central/netwerk/base/nsURLHelper.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
2 | | /* vim:set ts=4 sw=4 sts=4 et cindent: */ |
3 | | /* This Source Code Form is subject to the terms of the Mozilla Public |
4 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
5 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
6 | | |
7 | | #include "mozilla/RangedPtr.h" |
8 | | #include "mozilla/TextUtils.h" |
9 | | |
10 | | #include <algorithm> |
11 | | #include <iterator> |
12 | | |
13 | | #include "nsASCIIMask.h" |
14 | | #include "nsURLHelper.h" |
15 | | #include "nsIFile.h" |
16 | | #include "nsIURLParser.h" |
17 | | #include "nsCOMPtr.h" |
18 | | #include "nsCRT.h" |
19 | | #include "nsNetCID.h" |
20 | | #include "mozilla/Preferences.h" |
21 | | #include "prnetdb.h" |
22 | | #include "mozilla/Tokenizer.h" |
23 | | #include "nsEscape.h" |
24 | | |
25 | | using namespace mozilla; |
26 | | |
27 | | //---------------------------------------------------------------------------- |
28 | | // Init/Shutdown |
29 | | //---------------------------------------------------------------------------- |
30 | | |
31 | | static bool gInitialized = false; |
32 | | static nsIURLParser *gNoAuthURLParser = nullptr; |
33 | | static nsIURLParser *gAuthURLParser = nullptr; |
34 | | static nsIURLParser *gStdURLParser = nullptr; |
35 | | static int32_t gMaxLength = 1048576; // Default: 1MB |
36 | | |
37 | | static void |
38 | | InitGlobals() |
39 | 3 | { |
40 | 3 | nsCOMPtr<nsIURLParser> parser; |
41 | 3 | |
42 | 3 | parser = do_GetService(NS_NOAUTHURLPARSER_CONTRACTID); |
43 | 3 | NS_ASSERTION(parser, "failed getting 'noauth' url parser"); |
44 | 3 | if (parser) { |
45 | 3 | gNoAuthURLParser = parser.get(); |
46 | 3 | NS_ADDREF(gNoAuthURLParser); |
47 | 3 | } |
48 | 3 | |
49 | 3 | parser = do_GetService(NS_AUTHURLPARSER_CONTRACTID); |
50 | 3 | NS_ASSERTION(parser, "failed getting 'auth' url parser"); |
51 | 3 | if (parser) { |
52 | 3 | gAuthURLParser = parser.get(); |
53 | 3 | NS_ADDREF(gAuthURLParser); |
54 | 3 | } |
55 | 3 | |
56 | 3 | parser = do_GetService(NS_STDURLPARSER_CONTRACTID); |
57 | 3 | NS_ASSERTION(parser, "failed getting 'std' url parser"); |
58 | 3 | if (parser) { |
59 | 3 | gStdURLParser = parser.get(); |
60 | 3 | NS_ADDREF(gStdURLParser); |
61 | 3 | } |
62 | 3 | |
63 | 3 | gInitialized = true; |
64 | 3 | Preferences::AddIntVarCache(&gMaxLength, |
65 | 3 | "network.standard-url.max-length", 1048576); |
66 | 3 | } |
67 | | |
68 | | void |
69 | | net_ShutdownURLHelper() |
70 | 0 | { |
71 | 0 | if (gInitialized) { |
72 | 0 | NS_IF_RELEASE(gNoAuthURLParser); |
73 | 0 | NS_IF_RELEASE(gAuthURLParser); |
74 | 0 | NS_IF_RELEASE(gStdURLParser); |
75 | 0 | gInitialized = false; |
76 | 0 | } |
77 | 0 | } |
78 | | |
79 | | int32_t net_GetURLMaxLength() |
80 | 5.56M | { |
81 | 5.56M | return gMaxLength; |
82 | 5.56M | } |
83 | | |
84 | | //---------------------------------------------------------------------------- |
85 | | // nsIURLParser getters |
86 | | //---------------------------------------------------------------------------- |
87 | | |
88 | | nsIURLParser * |
89 | | net_GetAuthURLParser() |
90 | 1.08M | { |
91 | 1.08M | if (!gInitialized) |
92 | 0 | InitGlobals(); |
93 | 1.08M | return gAuthURLParser; |
94 | 1.08M | } |
95 | | |
96 | | nsIURLParser * |
97 | | net_GetNoAuthURLParser() |
98 | 7.01k | { |
99 | 7.01k | if (!gInitialized) |
100 | 0 | InitGlobals(); |
101 | 7.01k | return gNoAuthURLParser; |
102 | 7.01k | } |
103 | | |
104 | | nsIURLParser * |
105 | | net_GetStdURLParser() |
106 | 2.28M | { |
107 | 2.28M | if (!gInitialized) |
108 | 3 | InitGlobals(); |
109 | 2.28M | return gStdURLParser; |
110 | 2.28M | } |
111 | | |
112 | | //--------------------------------------------------------------------------- |
113 | | // GetFileFromURLSpec implementations |
114 | | //--------------------------------------------------------------------------- |
115 | | nsresult |
116 | | net_GetURLSpecFromDir(nsIFile *aFile, nsACString &result) |
117 | 0 | { |
118 | 0 | nsAutoCString escPath; |
119 | 0 | nsresult rv = net_GetURLSpecFromActualFile(aFile, escPath); |
120 | 0 | if (NS_FAILED(rv)) |
121 | 0 | return rv; |
122 | 0 | |
123 | 0 | if (escPath.Last() != '/') { |
124 | 0 | escPath += '/'; |
125 | 0 | } |
126 | 0 |
|
127 | 0 | result = escPath; |
128 | 0 | return NS_OK; |
129 | 0 | } |
130 | | |
131 | | nsresult |
132 | | net_GetURLSpecFromFile(nsIFile *aFile, nsACString &result) |
133 | 0 | { |
134 | 0 | nsAutoCString escPath; |
135 | 0 | nsresult rv = net_GetURLSpecFromActualFile(aFile, escPath); |
136 | 0 | if (NS_FAILED(rv)) |
137 | 0 | return rv; |
138 | 0 | |
139 | 0 | // if this file references a directory, then we need to ensure that the |
140 | 0 | // URL ends with a slash. this is important since it affects the rules |
141 | 0 | // for relative URL resolution when this URL is used as a base URL. |
142 | 0 | // if the file does not exist, then we make no assumption about its type, |
143 | 0 | // and simply leave the URL unmodified. |
144 | 0 | if (escPath.Last() != '/') { |
145 | 0 | bool dir; |
146 | 0 | rv = aFile->IsDirectory(&dir); |
147 | 0 | if (NS_SUCCEEDED(rv) && dir) |
148 | 0 | escPath += '/'; |
149 | 0 | } |
150 | 0 |
|
151 | 0 | result = escPath; |
152 | 0 | return NS_OK; |
153 | 0 | } |
154 | | |
155 | | //---------------------------------------------------------------------------- |
156 | | // file:// URL parsing |
157 | | //---------------------------------------------------------------------------- |
158 | | |
159 | | nsresult |
160 | | net_ParseFileURL(const nsACString &inURL, |
161 | | nsACString &outDirectory, |
162 | | nsACString &outFileBaseName, |
163 | | nsACString &outFileExtension) |
164 | 9 | { |
165 | 9 | nsresult rv; |
166 | 9 | |
167 | 9 | if (inURL.Length() > (uint32_t) gMaxLength) { |
168 | 0 | return NS_ERROR_MALFORMED_URI; |
169 | 0 | } |
170 | 9 | |
171 | 9 | outDirectory.Truncate(); |
172 | 9 | outFileBaseName.Truncate(); |
173 | 9 | outFileExtension.Truncate(); |
174 | 9 | |
175 | 9 | const nsPromiseFlatCString &flatURL = PromiseFlatCString(inURL); |
176 | 9 | const char *url = flatURL.get(); |
177 | 9 | |
178 | 9 | nsAutoCString scheme; |
179 | 9 | rv = net_ExtractURLScheme(flatURL, scheme); |
180 | 9 | if (NS_FAILED(rv)) return rv; |
181 | 9 | |
182 | 9 | if (!scheme.EqualsLiteral("file")) { |
183 | 0 | NS_ERROR("must be a file:// url"); |
184 | 0 | return NS_ERROR_UNEXPECTED; |
185 | 0 | } |
186 | 9 | |
187 | 9 | nsIURLParser *parser = net_GetNoAuthURLParser(); |
188 | 9 | NS_ENSURE_TRUE(parser, NS_ERROR_UNEXPECTED); |
189 | 9 | |
190 | 9 | uint32_t pathPos, filepathPos, directoryPos, basenamePos, extensionPos; |
191 | 9 | int32_t pathLen, filepathLen, directoryLen, basenameLen, extensionLen; |
192 | 9 | |
193 | 9 | // invoke the parser to extract the URL path |
194 | 9 | rv = parser->ParseURL(url, flatURL.Length(), |
195 | 9 | nullptr, nullptr, // don't care about scheme |
196 | 9 | nullptr, nullptr, // don't care about authority |
197 | 9 | &pathPos, &pathLen); |
198 | 9 | if (NS_FAILED(rv)) return rv; |
199 | 9 | |
200 | 9 | // invoke the parser to extract filepath from the path |
201 | 9 | rv = parser->ParsePath(url + pathPos, pathLen, |
202 | 9 | &filepathPos, &filepathLen, |
203 | 9 | nullptr, nullptr, // don't care about query |
204 | 9 | nullptr, nullptr); // don't care about ref |
205 | 9 | if (NS_FAILED(rv)) return rv; |
206 | 9 | |
207 | 9 | filepathPos += pathPos; |
208 | 9 | |
209 | 9 | // invoke the parser to extract the directory and filename from filepath |
210 | 9 | rv = parser->ParseFilePath(url + filepathPos, filepathLen, |
211 | 9 | &directoryPos, &directoryLen, |
212 | 9 | &basenamePos, &basenameLen, |
213 | 9 | &extensionPos, &extensionLen); |
214 | 9 | if (NS_FAILED(rv)) return rv; |
215 | 9 | |
216 | 9 | if (directoryLen > 0) |
217 | 9 | outDirectory = Substring(inURL, filepathPos + directoryPos, directoryLen); |
218 | 9 | if (basenameLen > 0) |
219 | 9 | outFileBaseName = Substring(inURL, filepathPos + basenamePos, basenameLen); |
220 | 9 | if (extensionLen > 0) |
221 | 9 | outFileExtension = Substring(inURL, filepathPos + extensionPos, extensionLen); |
222 | 9 | // since we are using a no-auth url parser, there will never be a host |
223 | 9 | // XXX not strictly true... file://localhost/foo/bar.html is a valid URL |
224 | 9 | |
225 | 9 | return NS_OK; |
226 | 9 | } |
227 | | |
228 | | //---------------------------------------------------------------------------- |
229 | | // path manipulation functions |
230 | | //---------------------------------------------------------------------------- |
231 | | |
232 | | // Replace all /./ with a / while resolving URLs |
233 | | // But only till #? |
234 | | void |
235 | | net_CoalesceDirs(netCoalesceFlags flags, char* path) |
236 | 1.07M | { |
237 | 1.07M | /* Stolen from the old netlib's mkparse.c. |
238 | 1.07M | * |
239 | 1.07M | * modifies a url of the form /foo/../foo1 -> /foo1 |
240 | 1.07M | * and /foo/./foo1 -> /foo/foo1 |
241 | 1.07M | * and /foo/foo1/.. -> /foo/ |
242 | 1.07M | */ |
243 | 1.07M | char *fwdPtr = path; |
244 | 1.07M | char *urlPtr = path; |
245 | 1.07M | char *lastslash = path; |
246 | 1.07M | uint32_t traversal = 0; |
247 | 1.07M | uint32_t special_ftp_len = 0; |
248 | 1.07M | |
249 | 1.07M | /* Remember if this url is a special ftp one: */ |
250 | 1.07M | if (flags & NET_COALESCE_DOUBLE_SLASH_IS_ROOT) |
251 | 1.35k | { |
252 | 1.35k | /* some schemes (for example ftp) have the speciality that |
253 | 1.35k | the path can begin // or /%2F to mark the root of the |
254 | 1.35k | servers filesystem, a simple / only marks the root relative |
255 | 1.35k | to the user loging in. We remember the length of the marker */ |
256 | 1.35k | if (nsCRT::strncasecmp(path,"/%2F",4) == 0) |
257 | 137 | special_ftp_len = 4; |
258 | 1.21k | else if (strncmp(path, "//", 2) == 0 ) |
259 | 891 | special_ftp_len = 2; |
260 | 1.35k | } |
261 | 1.07M | |
262 | 1.07M | /* find the last slash before # or ? */ |
263 | 10.8M | for(; (*fwdPtr != '\0') && |
264 | 10.8M | (*fwdPtr != '?') && |
265 | 10.8M | (*fwdPtr != '#'); ++fwdPtr) |
266 | 9.73M | { |
267 | 9.73M | } |
268 | 1.07M | |
269 | 1.07M | /* found nothing, but go back one only */ |
270 | 1.07M | /* if there is something to go back to */ |
271 | 1.07M | if (fwdPtr != path && *fwdPtr == '\0') |
272 | 948k | { |
273 | 948k | --fwdPtr; |
274 | 948k | } |
275 | 1.07M | |
276 | 1.07M | /* search the slash */ |
277 | 7.31M | for(; (fwdPtr != path) && |
278 | 7.31M | (*fwdPtr != '/'); --fwdPtr) |
279 | 6.24M | { |
280 | 6.24M | } |
281 | 1.07M | lastslash = fwdPtr; |
282 | 1.07M | fwdPtr = path; |
283 | 1.07M | |
284 | 1.07M | /* replace all %2E or %2e with . in the path */ |
285 | 1.07M | /* but stop at lastchar if non null */ |
286 | 3.60M | for(; (*fwdPtr != '\0') && |
287 | 3.60M | (*fwdPtr != '?') && |
288 | 3.60M | (*fwdPtr != '#') && |
289 | 3.60M | (*lastslash == '\0' || fwdPtr != lastslash); ++fwdPtr) |
290 | 2.53M | { |
291 | 2.53M | if (*fwdPtr == '%' && *(fwdPtr+1) == '2' && |
292 | 2.53M | (*(fwdPtr+2) == 'E' || *(fwdPtr+2) == 'e')) |
293 | 414 | { |
294 | 414 | *urlPtr++ = '.'; |
295 | 414 | ++fwdPtr; |
296 | 414 | ++fwdPtr; |
297 | 414 | } |
298 | 2.53M | else |
299 | 2.53M | { |
300 | 2.53M | *urlPtr++ = *fwdPtr; |
301 | 2.53M | } |
302 | 2.53M | } |
303 | 1.07M | // Copy remaining stuff past the #?; |
304 | 8.86M | for (; *fwdPtr != '\0'; ++fwdPtr) |
305 | 7.79M | { |
306 | 7.79M | *urlPtr++ = *fwdPtr; |
307 | 7.79M | } |
308 | 1.07M | *urlPtr = '\0'; // terminate the url |
309 | 1.07M | |
310 | 1.07M | // start again, this time for real |
311 | 1.07M | fwdPtr = path; |
312 | 1.07M | urlPtr = path; |
313 | 1.07M | |
314 | 10.7M | for(; (*fwdPtr != '\0') && |
315 | 10.7M | (*fwdPtr != '?') && |
316 | 10.7M | (*fwdPtr != '#'); ++fwdPtr) |
317 | 9.72M | { |
318 | 9.72M | if (*fwdPtr == '/' && *(fwdPtr+1) == '.' && *(fwdPtr+2) == '/' ) |
319 | 830 | { |
320 | 830 | // remove . followed by slash |
321 | 830 | ++fwdPtr; |
322 | 830 | } |
323 | 9.72M | else if(*fwdPtr == '/' && *(fwdPtr+1) == '.' && *(fwdPtr+2) == '.' && |
324 | 9.72M | (*(fwdPtr+3) == '/' || |
325 | 4.09k | *(fwdPtr+3) == '\0' || // This will take care of |
326 | 4.09k | *(fwdPtr+3) == '?' || // something like foo/bar/..#sometag |
327 | 4.09k | *(fwdPtr+3) == '#')) |
328 | 3.13k | { |
329 | 3.13k | // remove foo/.. |
330 | 3.13k | // reverse the urlPtr to the previous slash if possible |
331 | 3.13k | // if url does not allow relative root then drop .. above root |
332 | 3.13k | // otherwise retain them in the path |
333 | 3.13k | if(traversal > 0 || !(flags & |
334 | 1.56k | NET_COALESCE_ALLOW_RELATIVE_ROOT)) |
335 | 2.39k | { |
336 | 2.39k | if (urlPtr != path) |
337 | 1.16k | urlPtr--; // we must be going back at least by one |
338 | 93.1k | for(;*urlPtr != '/' && urlPtr != path; urlPtr--) |
339 | 90.8k | ; // null body |
340 | 2.39k | --traversal; // count back |
341 | 2.39k | // forward the fwdPtr past the ../ |
342 | 2.39k | fwdPtr += 2; |
343 | 2.39k | // if we have reached the beginning of the path |
344 | 2.39k | // while searching for the previous / and we remember |
345 | 2.39k | // that it is an url that begins with /%2F then |
346 | 2.39k | // advance urlPtr again by 3 chars because /%2F already |
347 | 2.39k | // marks the root of the path |
348 | 2.39k | if (urlPtr == path && special_ftp_len > 3) |
349 | 95 | { |
350 | 95 | ++urlPtr; |
351 | 95 | ++urlPtr; |
352 | 95 | ++urlPtr; |
353 | 95 | } |
354 | 2.39k | // special case if we have reached the end |
355 | 2.39k | // to preserve the last / |
356 | 2.39k | if (*fwdPtr == '.' && *(fwdPtr+1) == '\0') |
357 | 360 | ++urlPtr; |
358 | 2.39k | } |
359 | 741 | else |
360 | 741 | { |
361 | 741 | // there are to much /.. in this path, just copy them instead. |
362 | 741 | // forward the urlPtr past the /.. and copying it |
363 | 741 | |
364 | 741 | // However if we remember it is an url that starts with |
365 | 741 | // /%2F and urlPtr just points at the "F" of "/%2F" then do |
366 | 741 | // not overwrite it with the /, just copy .. and move forward |
367 | 741 | // urlPtr. |
368 | 741 | if (special_ftp_len > 3 && urlPtr == path+special_ftp_len-1) |
369 | 26 | ++urlPtr; |
370 | 715 | else |
371 | 715 | *urlPtr++ = *fwdPtr; |
372 | 741 | ++fwdPtr; |
373 | 741 | *urlPtr++ = *fwdPtr; |
374 | 741 | ++fwdPtr; |
375 | 741 | *urlPtr++ = *fwdPtr; |
376 | 741 | } |
377 | 3.13k | } |
378 | 9.72M | else |
379 | 9.72M | { |
380 | 9.72M | // count the hierachie, but only if we do not have reached |
381 | 9.72M | // the root of some special urls with a special root marker |
382 | 9.72M | if (*fwdPtr == '/' && *(fwdPtr+1) != '.' && |
383 | 9.72M | (special_ftp_len != 2 || *(fwdPtr+1) != '/')) |
384 | 1.11M | traversal++; |
385 | 9.72M | // copy the url incrementaly |
386 | 9.72M | *urlPtr++ = *fwdPtr; |
387 | 9.72M | } |
388 | 9.72M | } |
389 | 1.07M | |
390 | 1.07M | /* |
391 | 1.07M | * Now lets remove trailing . case |
392 | 1.07M | * /foo/foo1/. -> /foo/foo1/ |
393 | 1.07M | */ |
394 | 1.07M | |
395 | 1.07M | if ((urlPtr > (path+1)) && (*(urlPtr-1) == '.') && (*(urlPtr-2) == '/')) |
396 | 585 | urlPtr--; |
397 | 1.07M | |
398 | 1.07M | // Copy remaining stuff past the #?; |
399 | 1.66M | for (; *fwdPtr != '\0'; ++fwdPtr) |
400 | 595k | { |
401 | 595k | *urlPtr++ = *fwdPtr; |
402 | 595k | } |
403 | 1.07M | *urlPtr = '\0'; // terminate the url |
404 | 1.07M | } |
405 | | |
406 | | nsresult |
407 | | net_ResolveRelativePath(const nsACString &relativePath, |
408 | | const nsACString &basePath, |
409 | | nsACString &result) |
410 | 0 | { |
411 | 0 | nsAutoCString name; |
412 | 0 | nsAutoCString path(basePath); |
413 | 0 | bool needsDelim = false; |
414 | 0 |
|
415 | 0 | if ( !path.IsEmpty() ) { |
416 | 0 | char16_t last = path.Last(); |
417 | 0 | needsDelim = !(last == '/'); |
418 | 0 | } |
419 | 0 |
|
420 | 0 | nsACString::const_iterator beg, end; |
421 | 0 | relativePath.BeginReading(beg); |
422 | 0 | relativePath.EndReading(end); |
423 | 0 |
|
424 | 0 | bool stop = false; |
425 | 0 | char c; |
426 | 0 | for (; !stop; ++beg) { |
427 | 0 | c = (beg == end) ? '\0' : *beg; |
428 | 0 | //printf("%c [name=%s] [path=%s]\n", c, name.get(), path.get()); |
429 | 0 | switch (c) { |
430 | 0 | case '\0': |
431 | 0 | case '#': |
432 | 0 | case '?': |
433 | 0 | stop = true; |
434 | 0 | MOZ_FALLTHROUGH; |
435 | 0 | case '/': |
436 | 0 | // delimiter found |
437 | 0 | if (name.EqualsLiteral("..")) { |
438 | 0 | // pop path |
439 | 0 | // If we already have the delim at end, then |
440 | 0 | // skip over that when searching for next one to the left |
441 | 0 | int32_t offset = path.Length() - (needsDelim ? 1 : 2); |
442 | 0 | // First check for errors |
443 | 0 | if (offset < 0 ) |
444 | 0 | return NS_ERROR_MALFORMED_URI; |
445 | 0 | int32_t pos = path.RFind("/", false, offset); |
446 | 0 | if (pos >= 0) |
447 | 0 | path.Truncate(pos + 1); |
448 | 0 | else |
449 | 0 | path.Truncate(); |
450 | 0 | } |
451 | 0 | else if (name.IsEmpty() || name.EqualsLiteral(".")) { |
452 | 0 | // do nothing |
453 | 0 | } |
454 | 0 | else { |
455 | 0 | // append name to path |
456 | 0 | if (needsDelim) |
457 | 0 | path += '/'; |
458 | 0 | path += name; |
459 | 0 | needsDelim = true; |
460 | 0 | } |
461 | 0 | name.Truncate(); |
462 | 0 | break; |
463 | 0 |
|
464 | 0 | default: |
465 | 0 | // append char to name |
466 | 0 | name += c; |
467 | 0 | } |
468 | 0 | } |
469 | 0 | // append anything left on relativePath (e.g. #..., ;..., ?...) |
470 | 0 | if (c != '\0') |
471 | 0 | path += Substring(--beg, end); |
472 | 0 |
|
473 | 0 | result = path; |
474 | 0 | return NS_OK; |
475 | 0 | } |
476 | | |
477 | | //---------------------------------------------------------------------------- |
478 | | // scheme fu |
479 | | //---------------------------------------------------------------------------- |
480 | | |
481 | | static bool |
482 | | net_IsValidSchemeChar(const char aChar) |
483 | 18.9M | { |
484 | 18.9M | if (IsAsciiAlpha(aChar) || IsAsciiDigit(aChar) || |
485 | 18.9M | aChar == '+' || aChar == '.' || aChar == '-') { |
486 | 14.9M | return true; |
487 | 14.9M | } |
488 | 3.99M | return false; |
489 | 3.99M | } |
490 | | |
491 | | /* Extract URI-Scheme if possible */ |
492 | | nsresult |
493 | | net_ExtractURLScheme(const nsACString &inURI, |
494 | | nsACString& scheme) |
495 | 4.97M | { |
496 | 4.97M | nsACString::const_iterator start, end; |
497 | 4.97M | inURI.BeginReading(start); |
498 | 4.97M | inURI.EndReading(end); |
499 | 4.97M | |
500 | 4.97M | // Strip C0 and space from begining |
501 | 6.27M | while (start != end) { |
502 | 6.27M | if ((uint8_t) *start > 0x20) { |
503 | 4.97M | break; |
504 | 4.97M | } |
505 | 1.29M | start++; |
506 | 1.29M | } |
507 | 4.97M | |
508 | 4.97M | Tokenizer p(Substring(start, end), "\r\n\t"); |
509 | 4.97M | p.Record(); |
510 | 4.97M | if (!p.CheckChar(IsAsciiAlpha)) { |
511 | 756k | // First char must be alpha |
512 | 756k | return NS_ERROR_MALFORMED_URI; |
513 | 756k | } |
514 | 4.22M | |
515 | 17.3M | while (p.CheckChar(net_IsValidSchemeChar) || p.CheckWhite()) { |
516 | 13.1M | // Skip valid scheme characters or \r\n\t |
517 | 13.1M | } |
518 | 4.22M | |
519 | 4.22M | if (!p.CheckChar(':')) { |
520 | 306k | return NS_ERROR_MALFORMED_URI; |
521 | 306k | } |
522 | 3.91M | |
523 | 3.91M | p.Claim(scheme); |
524 | 3.91M | scheme.StripTaggedASCII(ASCIIMask::MaskCRLFTab()); |
525 | 3.91M | ToLowerCase(scheme); |
526 | 3.91M | return NS_OK; |
527 | 3.91M | } |
528 | | |
529 | | bool |
530 | | net_IsValidScheme(const char *scheme, uint32_t schemeLen) |
531 | 2.80M | { |
532 | 2.80M | // first char must be alpha |
533 | 2.80M | if (!IsAsciiAlpha(*scheme)) |
534 | 2.12k | return false; |
535 | 2.80M | |
536 | 2.80M | // nsCStrings may have embedded nulls -- reject those too |
537 | 12.5M | for (; schemeLen; ++scheme, --schemeLen) { |
538 | 9.76M | if (!(IsAsciiAlpha(*scheme) || |
539 | 9.76M | IsAsciiDigit(*scheme) || |
540 | 9.76M | *scheme == '+' || |
541 | 9.76M | *scheme == '.' || |
542 | 9.76M | *scheme == '-')) |
543 | 2.22k | return false; |
544 | 9.76M | } |
545 | 2.80M | |
546 | 2.80M | return true; |
547 | 2.80M | } |
548 | | |
549 | | bool |
550 | | net_IsAbsoluteURL(const nsACString& uri) |
551 | 1.11M | { |
552 | 1.11M | nsACString::const_iterator start, end; |
553 | 1.11M | uri.BeginReading(start); |
554 | 1.11M | uri.EndReading(end); |
555 | 1.11M | |
556 | 1.11M | // Strip C0 and space from begining |
557 | 1.64M | while (start != end) { |
558 | 1.63M | if ((uint8_t) *start > 0x20) { |
559 | 1.11M | break; |
560 | 1.11M | } |
561 | 523k | start++; |
562 | 523k | } |
563 | 1.11M | |
564 | 1.11M | Tokenizer p(Substring(start, end), "\r\n\t"); |
565 | 1.11M | |
566 | 1.11M | // First char must be alpha |
567 | 1.11M | if (!p.CheckChar(IsAsciiAlpha)) { |
568 | 753k | return false; |
569 | 753k | } |
570 | 363k | |
571 | 2.16M | while (p.CheckChar(net_IsValidSchemeChar) || p.CheckWhite()) { |
572 | 1.80M | // Skip valid scheme characters or \r\n\t |
573 | 1.80M | } |
574 | 363k | if (!p.CheckChar(':')) { |
575 | 306k | return false; |
576 | 306k | } |
577 | 57.5k | p.SkipWhites(); |
578 | 57.5k | |
579 | 57.5k | if (!p.CheckChar('/')) { |
580 | 35.7k | return false; |
581 | 35.7k | } |
582 | 21.8k | p.SkipWhites(); |
583 | 21.8k | |
584 | 21.8k | if (p.CheckChar('/')) { |
585 | 3.08k | // aSpec is really absolute. Ignore aBaseURI in this case |
586 | 3.08k | return true; |
587 | 3.08k | } |
588 | 18.7k | return false; |
589 | 18.7k | } |
590 | | |
591 | | void |
592 | | net_FilterURIString(const nsACString& input, nsACString& result) |
593 | 3.86M | { |
594 | 3.86M | result.Truncate(); |
595 | 3.86M | |
596 | 3.86M | auto start = input.BeginReading(); |
597 | 3.86M | auto end = input.EndReading(); |
598 | 3.86M | |
599 | 3.86M | // Trim off leading and trailing invalid chars. |
600 | 9.98M | auto charFilter = [](char c) { return static_cast<uint8_t>(c) > 0x20; }; |
601 | 3.86M | auto newStart = std::find_if(start, end, charFilter); |
602 | 3.86M | auto newEnd = std::find_if( |
603 | 3.86M | std::reverse_iterator<decltype(end)>(end), |
604 | 3.86M | std::reverse_iterator<decltype(newStart)>(newStart), |
605 | 3.86M | charFilter).base(); |
606 | 3.86M | |
607 | 3.86M | // Check if chars need to be stripped. |
608 | 3.86M | bool needsStrip = false; |
609 | 3.86M | const ASCIIMaskArray& mask = ASCIIMask::MaskCRLFTab(); |
610 | 60.2M | for (auto itr = start; itr != end; ++itr) { |
611 | 56.3M | if (ASCIIMask::IsMasked(mask, *itr)) { |
612 | 0 | needsStrip = true; |
613 | 0 | break; |
614 | 0 | } |
615 | 56.3M | } |
616 | 3.86M | |
617 | 3.86M | // Just use the passed in string rather than creating new copies if no |
618 | 3.86M | // changes are necessary. |
619 | 3.86M | if (newStart == start && newEnd == end && !needsStrip) { |
620 | 3.84M | result = input; |
621 | 3.84M | return; |
622 | 3.84M | } |
623 | 16.9k | |
624 | 16.9k | result.Assign(Substring(newStart, newEnd)); |
625 | 16.9k | if (needsStrip) { |
626 | 0 | result.StripTaggedASCII(mask); |
627 | 0 | } |
628 | 16.9k | } |
629 | | |
630 | | nsresult |
631 | | net_FilterAndEscapeURI(const nsACString& aInput, uint32_t aFlags, nsACString& aResult) |
632 | 1.92M | { |
633 | 1.92M | aResult.Truncate(); |
634 | 1.92M | |
635 | 1.92M | auto start = aInput.BeginReading(); |
636 | 1.92M | auto end = aInput.EndReading(); |
637 | 1.92M | |
638 | 1.92M | // Trim off leading and trailing invalid chars. |
639 | 4.84M | auto charFilter = [](char c) { return static_cast<uint8_t>(c) > 0x20; }; |
640 | 1.92M | auto newStart = std::find_if(start, end, charFilter); |
641 | 1.92M | auto newEnd = std::find_if( |
642 | 1.92M | std::reverse_iterator<decltype(end)>(end), |
643 | 1.92M | std::reverse_iterator<decltype(newStart)>(newStart), |
644 | 1.92M | charFilter).base(); |
645 | 1.92M | |
646 | 1.92M | const ASCIIMaskArray& mask = ASCIIMask::MaskCRLFTab(); |
647 | 1.92M | return NS_EscapeAndFilterURL(Substring(newStart, newEnd), aFlags, |
648 | 1.92M | &mask, aResult, fallible); |
649 | 1.92M | } |
650 | | |
651 | | #if defined(XP_WIN) |
652 | | bool |
653 | | net_NormalizeFileURL(const nsACString &aURL, nsCString &aResultBuf) |
654 | | { |
655 | | bool writing = false; |
656 | | |
657 | | nsACString::const_iterator beginIter, endIter; |
658 | | aURL.BeginReading(beginIter); |
659 | | aURL.EndReading(endIter); |
660 | | |
661 | | const char *s, *begin = beginIter.get(); |
662 | | |
663 | | for (s = begin; s != endIter.get(); ++s) |
664 | | { |
665 | | if (*s == '\\') |
666 | | { |
667 | | writing = true; |
668 | | if (s > begin) |
669 | | aResultBuf.Append(begin, s - begin); |
670 | | aResultBuf += '/'; |
671 | | begin = s + 1; |
672 | | } |
673 | | } |
674 | | if (writing && s > begin) |
675 | | aResultBuf.Append(begin, s - begin); |
676 | | |
677 | | return writing; |
678 | | } |
679 | | #endif |
680 | | |
681 | | //---------------------------------------------------------------------------- |
682 | | // miscellaneous (i.e., stuff that should really be elsewhere) |
683 | | //---------------------------------------------------------------------------- |
684 | | |
685 | | static inline |
686 | | void ToLower(char &c) |
687 | 16.4M | { |
688 | 16.4M | if ((unsigned)(c - 'A') <= (unsigned)('Z' - 'A')) |
689 | 1.77k | c += 'a' - 'A'; |
690 | 16.4M | } |
691 | | |
692 | | void |
693 | | net_ToLowerCase(char *str, uint32_t length) |
694 | 2.17M | { |
695 | 18.6M | for (char *end = str + length; str < end; ++str) |
696 | 16.4M | ToLower(*str); |
697 | 2.17M | } |
698 | | |
699 | | void |
700 | | net_ToLowerCase(char *str) |
701 | 0 | { |
702 | 0 | for (; *str; ++str) |
703 | 0 | ToLower(*str); |
704 | 0 | } |
705 | | |
706 | | char * |
707 | | net_FindCharInSet(const char *iter, const char *stop, const char *set) |
708 | 0 | { |
709 | 0 | for (; iter != stop && *iter; ++iter) { |
710 | 0 | for (const char *s = set; *s; ++s) { |
711 | 0 | if (*iter == *s) |
712 | 0 | return (char *) iter; |
713 | 0 | } |
714 | 0 | } |
715 | 0 | return (char *) iter; |
716 | 0 | } |
717 | | |
718 | | char * |
719 | | net_FindCharNotInSet(const char *iter, const char *stop, const char *set) |
720 | 0 | { |
721 | 0 | repeat: |
722 | 0 | for (const char *s = set; *s; ++s) { |
723 | 0 | if (*iter == *s) { |
724 | 0 | if (++iter == stop) |
725 | 0 | break; |
726 | 0 | goto repeat; |
727 | 0 | } |
728 | 0 | } |
729 | 0 | return (char *) iter; |
730 | 0 | } |
731 | | |
732 | | char * |
733 | | net_RFindCharNotInSet(const char *stop, const char *iter, const char *set) |
734 | 0 | { |
735 | 0 | --iter; |
736 | 0 | --stop; |
737 | 0 |
|
738 | 0 | if (iter == stop) |
739 | 0 | return (char *) iter; |
740 | 0 | |
741 | 0 | repeat: |
742 | 0 | for (const char *s = set; *s; ++s) { |
743 | 0 | if (*iter == *s) { |
744 | 0 | if (--iter == stop) |
745 | 0 | break; |
746 | 0 | goto repeat; |
747 | 0 | } |
748 | 0 | } |
749 | 0 | return (char *) iter; |
750 | 0 | } |
751 | | |
752 | 0 | #define HTTP_LWS " \t" |
753 | | |
754 | | // Return the index of the closing quote of the string, if any |
755 | | static uint32_t |
756 | | net_FindStringEnd(const nsCString& flatStr, |
757 | | uint32_t stringStart, |
758 | | char stringDelim) |
759 | 0 | { |
760 | 0 | NS_ASSERTION(stringStart < flatStr.Length() && |
761 | 0 | flatStr.CharAt(stringStart) == stringDelim && |
762 | 0 | (stringDelim == '"' || stringDelim == '\''), |
763 | 0 | "Invalid stringStart"); |
764 | 0 |
|
765 | 0 | const char set[] = { stringDelim, '\\', '\0' }; |
766 | 0 | do { |
767 | 0 | // stringStart points to either the start quote or the last |
768 | 0 | // escaped char (the char following a '\\') |
769 | 0 |
|
770 | 0 | // Write to searchStart here, so that when we get back to the |
771 | 0 | // top of the loop right outside this one we search from the |
772 | 0 | // right place. |
773 | 0 | uint32_t stringEnd = flatStr.FindCharInSet(set, stringStart + 1); |
774 | 0 | if (stringEnd == uint32_t(kNotFound)) |
775 | 0 | return flatStr.Length(); |
776 | 0 | |
777 | 0 | if (flatStr.CharAt(stringEnd) == '\\') { |
778 | 0 | // Hit a backslash-escaped char. Need to skip over it. |
779 | 0 | stringStart = stringEnd + 1; |
780 | 0 | if (stringStart == flatStr.Length()) |
781 | 0 | return stringStart; |
782 | 0 | |
783 | 0 | // Go back to looking for the next escape or the string end |
784 | 0 | continue; |
785 | 0 | } |
786 | 0 | |
787 | 0 | return stringEnd; |
788 | 0 |
|
789 | 0 | } while (true); |
790 | 0 |
|
791 | 0 | MOZ_ASSERT_UNREACHABLE("How did we get here?"); |
792 | 0 | return flatStr.Length(); |
793 | 0 | } |
794 | | |
795 | | |
796 | | static uint32_t |
797 | | net_FindMediaDelimiter(const nsCString& flatStr, |
798 | | uint32_t searchStart, |
799 | | char delimiter) |
800 | 0 | { |
801 | 0 | do { |
802 | 0 | // searchStart points to the spot from which we should start looking |
803 | 0 | // for the delimiter. |
804 | 0 | const char delimStr[] = { delimiter, '"', '\0' }; |
805 | 0 | uint32_t curDelimPos = flatStr.FindCharInSet(delimStr, searchStart); |
806 | 0 | if (curDelimPos == uint32_t(kNotFound)) |
807 | 0 | return flatStr.Length(); |
808 | 0 | |
809 | 0 | char ch = flatStr.CharAt(curDelimPos); |
810 | 0 | if (ch == delimiter) { |
811 | 0 | // Found delimiter |
812 | 0 | return curDelimPos; |
813 | 0 | } |
814 | 0 | |
815 | 0 | // We hit the start of a quoted string. Look for its end. |
816 | 0 | searchStart = net_FindStringEnd(flatStr, curDelimPos, ch); |
817 | 0 | if (searchStart == flatStr.Length()) |
818 | 0 | return searchStart; |
819 | 0 | |
820 | 0 | ++searchStart; |
821 | 0 |
|
822 | 0 | // searchStart now points to the first char after the end of the |
823 | 0 | // string, so just go back to the top of the loop and look for |
824 | 0 | // |delimiter| again. |
825 | 0 | } while (true); |
826 | 0 |
|
827 | 0 | MOZ_ASSERT_UNREACHABLE("How did we get here?"); |
828 | 0 | return flatStr.Length(); |
829 | 0 | } |
830 | | |
831 | | // aOffset should be added to aCharsetStart and aCharsetEnd if this |
832 | | // function sets them. |
833 | | static void |
834 | | net_ParseMediaType(const nsACString &aMediaTypeStr, |
835 | | nsACString &aContentType, |
836 | | nsACString &aContentCharset, |
837 | | int32_t aOffset, |
838 | | bool *aHadCharset, |
839 | | int32_t *aCharsetStart, |
840 | | int32_t *aCharsetEnd, |
841 | | bool aStrict) |
842 | 0 | { |
843 | 0 | const nsCString& flatStr = PromiseFlatCString(aMediaTypeStr); |
844 | 0 | const char* start = flatStr.get(); |
845 | 0 | const char* end = start + flatStr.Length(); |
846 | 0 |
|
847 | 0 | // Trim LWS leading and trailing whitespace from type. We include '(' in |
848 | 0 | // the trailing trim set to catch media-type comments, which are not at all |
849 | 0 | // standard, but may occur in rare cases. |
850 | 0 | const char* type = net_FindCharNotInSet(start, end, HTTP_LWS); |
851 | 0 | const char* typeEnd = net_FindCharInSet(type, end, HTTP_LWS ";("); |
852 | 0 |
|
853 | 0 | const char* charset = ""; |
854 | 0 | const char* charsetEnd = charset; |
855 | 0 | int32_t charsetParamStart = 0; |
856 | 0 | int32_t charsetParamEnd = 0; |
857 | 0 |
|
858 | 0 | uint32_t consumed = typeEnd - type; |
859 | 0 |
|
860 | 0 | // Iterate over parameters |
861 | 0 | bool typeHasCharset = false; |
862 | 0 | uint32_t paramStart = flatStr.FindChar(';', typeEnd - start); |
863 | 0 | if (paramStart != uint32_t(kNotFound)) { |
864 | 0 | // We have parameters. Iterate over them. |
865 | 0 | uint32_t curParamStart = paramStart + 1; |
866 | 0 | do { |
867 | 0 | uint32_t curParamEnd = |
868 | 0 | net_FindMediaDelimiter(flatStr, curParamStart, ';'); |
869 | 0 |
|
870 | 0 | const char* paramName = net_FindCharNotInSet(start + curParamStart, |
871 | 0 | start + curParamEnd, |
872 | 0 | HTTP_LWS); |
873 | 0 | static const char charsetStr[] = "charset="; |
874 | 0 | if (PL_strncasecmp(paramName, charsetStr, |
875 | 0 | sizeof(charsetStr) - 1) == 0) { |
876 | 0 | charset = paramName + sizeof(charsetStr) - 1; |
877 | 0 | charsetEnd = start + curParamEnd; |
878 | 0 | typeHasCharset = true; |
879 | 0 | charsetParamStart = curParamStart - 1; |
880 | 0 | charsetParamEnd = curParamEnd; |
881 | 0 | } |
882 | 0 |
|
883 | 0 | consumed = curParamEnd; |
884 | 0 | curParamStart = curParamEnd + 1; |
885 | 0 | } while (curParamStart < flatStr.Length()); |
886 | 0 | } |
887 | 0 |
|
888 | 0 | bool charsetNeedsQuotedStringUnescaping = false; |
889 | 0 | if (typeHasCharset) { |
890 | 0 | // Trim LWS leading and trailing whitespace from charset. We include |
891 | 0 | // '(' in the trailing trim set to catch media-type comments, which are |
892 | 0 | // not at all standard, but may occur in rare cases. |
893 | 0 | charset = net_FindCharNotInSet(charset, charsetEnd, HTTP_LWS); |
894 | 0 | if (*charset == '"') { |
895 | 0 | charsetNeedsQuotedStringUnescaping = true; |
896 | 0 | charsetEnd = |
897 | 0 | start + net_FindStringEnd(flatStr, charset - start, *charset); |
898 | 0 | charset++; |
899 | 0 | NS_ASSERTION(charsetEnd >= charset, "Bad charset parsing"); |
900 | 0 | } else { |
901 | 0 | charsetEnd = net_FindCharInSet(charset, charsetEnd, HTTP_LWS ";("); |
902 | 0 | } |
903 | 0 | } |
904 | 0 |
|
905 | 0 | // if the server sent "*/*", it is meaningless, so do not store it. |
906 | 0 | // also, if type is the same as aContentType, then just update the |
907 | 0 | // charset. however, if charset is empty and aContentType hasn't |
908 | 0 | // changed, then don't wipe-out an existing aContentCharset. We |
909 | 0 | // also want to reject a mime-type if it does not include a slash. |
910 | 0 | // some servers give junk after the charset parameter, which may |
911 | 0 | // include a comma, so this check makes us a bit more tolerant. |
912 | 0 |
|
913 | 0 | if (type != typeEnd && |
914 | 0 | memchr(type, '/', typeEnd - type) != nullptr && |
915 | 0 | (aStrict ? (net_FindCharNotInSet(start + consumed, end, HTTP_LWS) == end) : |
916 | 0 | (strncmp(type, "*/*", typeEnd - type) != 0))) { |
917 | 0 | // Common case here is that aContentType is empty |
918 | 0 | bool eq = !aContentType.IsEmpty() && |
919 | 0 | aContentType.Equals(Substring(type, typeEnd), |
920 | 0 | nsCaseInsensitiveCStringComparator()); |
921 | 0 | if (!eq) { |
922 | 0 | aContentType.Assign(type, typeEnd - type); |
923 | 0 | ToLowerCase(aContentType); |
924 | 0 | } |
925 | 0 |
|
926 | 0 | if ((!eq && *aHadCharset) || typeHasCharset) { |
927 | 0 | *aHadCharset = true; |
928 | 0 | if (charsetNeedsQuotedStringUnescaping) { |
929 | 0 | // parameters using the "quoted-string" syntax need |
930 | 0 | // backslash-escapes to be unescaped (see RFC 2616 Section 2.2) |
931 | 0 | aContentCharset.Truncate(); |
932 | 0 | for (const char *c = charset; c != charsetEnd; c++) { |
933 | 0 | if (*c == '\\' && c + 1 != charsetEnd) { |
934 | 0 | // eat escape |
935 | 0 | c++; |
936 | 0 | } |
937 | 0 | aContentCharset.Append(*c); |
938 | 0 | } |
939 | 0 | } |
940 | 0 | else { |
941 | 0 | aContentCharset.Assign(charset, charsetEnd - charset); |
942 | 0 | } |
943 | 0 | if (typeHasCharset) { |
944 | 0 | *aCharsetStart = charsetParamStart + aOffset; |
945 | 0 | *aCharsetEnd = charsetParamEnd + aOffset; |
946 | 0 | } |
947 | 0 | } |
948 | 0 | // Only set a new charset position if this is a different type |
949 | 0 | // from the last one we had and it doesn't already have a |
950 | 0 | // charset param. If this is the same type, we probably want |
951 | 0 | // to leave the charset position on its first occurrence. |
952 | 0 | if (!eq && !typeHasCharset) { |
953 | 0 | int32_t charsetStart = int32_t(paramStart); |
954 | 0 | if (charsetStart == kNotFound) |
955 | 0 | charsetStart = flatStr.Length(); |
956 | 0 |
|
957 | 0 | *aCharsetEnd = *aCharsetStart = charsetStart + aOffset; |
958 | 0 | } |
959 | 0 | } |
960 | 0 | } |
961 | | |
962 | | #undef HTTP_LWS |
963 | | |
964 | | void |
965 | | net_ParseContentType(const nsACString &aHeaderStr, |
966 | | nsACString &aContentType, |
967 | | nsACString &aContentCharset, |
968 | | bool *aHadCharset) |
969 | 0 | { |
970 | 0 | int32_t dummy1, dummy2; |
971 | 0 | net_ParseContentType(aHeaderStr, aContentType, aContentCharset, |
972 | 0 | aHadCharset, &dummy1, &dummy2); |
973 | 0 | } |
974 | | |
975 | | void |
976 | | net_ParseContentType(const nsACString &aHeaderStr, |
977 | | nsACString &aContentType, |
978 | | nsACString &aContentCharset, |
979 | | bool *aHadCharset, |
980 | | int32_t *aCharsetStart, |
981 | | int32_t *aCharsetEnd) |
982 | 0 | { |
983 | 0 | // |
984 | 0 | // Augmented BNF (from RFC 2616 section 3.7): |
985 | 0 | // |
986 | 0 | // header-value = media-type *( LWS "," LWS media-type ) |
987 | 0 | // media-type = type "/" subtype *( LWS ";" LWS parameter ) |
988 | 0 | // type = token |
989 | 0 | // subtype = token |
990 | 0 | // parameter = attribute "=" value |
991 | 0 | // attribute = token |
992 | 0 | // value = token | quoted-string |
993 | 0 | // |
994 | 0 | // |
995 | 0 | // Examples: |
996 | 0 | // |
997 | 0 | // text/html |
998 | 0 | // text/html, text/html |
999 | 0 | // text/html,text/html; charset=ISO-8859-1 |
1000 | 0 | // text/html,text/html; charset="ISO-8859-1" |
1001 | 0 | // text/html;charset=ISO-8859-1, text/html |
1002 | 0 | // text/html;charset='ISO-8859-1', text/html |
1003 | 0 | // application/octet-stream |
1004 | 0 | // |
1005 | 0 |
|
1006 | 0 | *aHadCharset = false; |
1007 | 0 | const nsCString& flatStr = PromiseFlatCString(aHeaderStr); |
1008 | 0 |
|
1009 | 0 | // iterate over media-types. Note that ',' characters can happen |
1010 | 0 | // inside quoted strings, so we need to watch out for that. |
1011 | 0 | uint32_t curTypeStart = 0; |
1012 | 0 | do { |
1013 | 0 | // curTypeStart points to the start of the current media-type. We want |
1014 | 0 | // to look for its end. |
1015 | 0 | uint32_t curTypeEnd = |
1016 | 0 | net_FindMediaDelimiter(flatStr, curTypeStart, ','); |
1017 | 0 |
|
1018 | 0 | // At this point curTypeEnd points to the spot where the media-type |
1019 | 0 | // starting at curTypeEnd ends. Time to parse that! |
1020 | 0 | net_ParseMediaType(Substring(flatStr, curTypeStart, |
1021 | 0 | curTypeEnd - curTypeStart), |
1022 | 0 | aContentType, aContentCharset, curTypeStart, |
1023 | 0 | aHadCharset, aCharsetStart, aCharsetEnd, false); |
1024 | 0 |
|
1025 | 0 | // And let's move on to the next media-type |
1026 | 0 | curTypeStart = curTypeEnd + 1; |
1027 | 0 | } while (curTypeStart < flatStr.Length()); |
1028 | 0 | } |
1029 | | |
1030 | | void |
1031 | | net_ParseRequestContentType(const nsACString &aHeaderStr, |
1032 | | nsACString &aContentType, |
1033 | | nsACString &aContentCharset, |
1034 | | bool *aHadCharset) |
1035 | 0 | { |
1036 | 0 | // |
1037 | 0 | // Augmented BNF (from RFC 7231 section 3.1.1.1): |
1038 | 0 | // |
1039 | 0 | // media-type = type "/" subtype *( OWS ";" OWS parameter ) |
1040 | 0 | // type = token |
1041 | 0 | // subtype = token |
1042 | 0 | // parameter = token "=" ( token / quoted-string ) |
1043 | 0 | // |
1044 | 0 | // Examples: |
1045 | 0 | // |
1046 | 0 | // text/html |
1047 | 0 | // text/html; charset=ISO-8859-1 |
1048 | 0 | // text/html; charset="ISO-8859-1" |
1049 | 0 | // application/octet-stream |
1050 | 0 | // |
1051 | 0 |
|
1052 | 0 | aContentType.Truncate(); |
1053 | 0 | aContentCharset.Truncate(); |
1054 | 0 | *aHadCharset = false; |
1055 | 0 | const nsCString& flatStr = PromiseFlatCString(aHeaderStr); |
1056 | 0 |
|
1057 | 0 | // At this point curTypeEnd points to the spot where the media-type |
1058 | 0 | // starting at curTypeEnd ends. Time to parse that! |
1059 | 0 | nsAutoCString contentType, contentCharset; |
1060 | 0 | bool hadCharset = false; |
1061 | 0 | int32_t dummy1, dummy2; |
1062 | 0 | uint32_t typeEnd = net_FindMediaDelimiter(flatStr, 0, ','); |
1063 | 0 | if (typeEnd != flatStr.Length()) { |
1064 | 0 | // We have some stuff left at the end, so this is not a valid |
1065 | 0 | // request Content-Type header. |
1066 | 0 | return; |
1067 | 0 | } |
1068 | 0 | net_ParseMediaType(flatStr, contentType, contentCharset, 0, |
1069 | 0 | &hadCharset, &dummy1, &dummy2, true); |
1070 | 0 |
|
1071 | 0 | aContentType = contentType; |
1072 | 0 | aContentCharset = contentCharset; |
1073 | 0 | *aHadCharset = hadCharset; |
1074 | 0 | } |
1075 | | |
1076 | | bool |
1077 | | net_IsValidHostName(const nsACString& host) |
1078 | 0 | { |
1079 | 0 | const char *end = host.EndReading(); |
1080 | 0 | // Use explicit whitelists to select which characters we are |
1081 | 0 | // willing to send to lower-level DNS logic. This is more |
1082 | 0 | // self-documenting, and can also be slightly faster than the |
1083 | 0 | // blacklist approach, since DNS names are the common case, and |
1084 | 0 | // the commonest characters will tend to be near the start of |
1085 | 0 | // the list. |
1086 | 0 |
|
1087 | 0 | // Whitelist for DNS names (RFC 1035) with extra characters added |
1088 | 0 | // for pragmatic reasons "$+_" |
1089 | 0 | // see https://bugzilla.mozilla.org/show_bug.cgi?id=355181#c2 |
1090 | 0 | if (net_FindCharNotInSet(host.BeginReading(), end, |
1091 | 0 | "abcdefghijklmnopqrstuvwxyz" |
1092 | 0 | ".-0123456789" |
1093 | 0 | "ABCDEFGHIJKLMNOPQRSTUVWXYZ$+_") == end) |
1094 | 0 | return true; |
1095 | 0 | |
1096 | 0 | // Might be a valid IPv6 link-local address containing a percent sign |
1097 | 0 | nsAutoCString strhost(host); |
1098 | 0 | PRNetAddr addr; |
1099 | 0 | return PR_StringToNetAddr(strhost.get(), &addr) == PR_SUCCESS; |
1100 | 0 | } |
1101 | | |
1102 | | bool |
1103 | | net_IsValidIPv4Addr(const char *addr, int32_t addrLen) |
1104 | 2.68k | { |
1105 | 2.68k | RangedPtr<const char> p(addr, addrLen); |
1106 | 2.68k | |
1107 | 2.68k | int32_t octet = -1; // means no digit yet |
1108 | 2.68k | int32_t dotCount = 0; // number of dots in the address |
1109 | 2.68k | |
1110 | 18.2k | for (; addrLen; ++p, --addrLen) { |
1111 | 17.2k | if (*p == '.') { |
1112 | 4.76k | dotCount++; |
1113 | 4.76k | if (octet == -1) { |
1114 | 343 | // invalid octet |
1115 | 343 | return false; |
1116 | 343 | } |
1117 | 4.41k | octet = -1; |
1118 | 12.5k | } else if (*p >= '0' && *p <='9') { |
1119 | 12.0k | if (octet == 0) { |
1120 | 80 | // leading 0 is not allowed |
1121 | 80 | return false; |
1122 | 80 | } |
1123 | 11.9k | if (octet == -1) { |
1124 | 6.61k | octet = *p - '0'; |
1125 | 6.61k | } else { |
1126 | 5.32k | octet *= 10; |
1127 | 5.32k | octet += *p - '0'; |
1128 | 5.32k | if (octet > 255) |
1129 | 819 | return false; |
1130 | 513 | } |
1131 | 513 | } else { |
1132 | 513 | // invalid character |
1133 | 513 | return false; |
1134 | 513 | } |
1135 | 17.2k | } |
1136 | 2.68k | |
1137 | 2.68k | return (dotCount == 3 && octet != -1); |
1138 | 2.68k | } |
1139 | | |
1140 | | bool |
1141 | | net_IsValidIPv6Addr(const char *addr, int32_t addrLen) |
1142 | 6.14k | { |
1143 | 6.14k | RangedPtr<const char> p(addr, addrLen); |
1144 | 6.14k | |
1145 | 6.14k | int32_t digits = 0; // number of digits in current block |
1146 | 6.14k | int32_t colons = 0; // number of colons in a row during parsing |
1147 | 6.14k | int32_t blocks = 0; // number of hexadecimal blocks |
1148 | 6.14k | bool haveZeros = false; // true if double colon is present in the address |
1149 | 6.14k | |
1150 | 26.1k | for (; addrLen; ++p, --addrLen) { |
1151 | 23.7k | if (*p == ':') { |
1152 | 5.70k | if (colons == 0) { |
1153 | 3.57k | if (digits != 0) { |
1154 | 2.22k | digits = 0; |
1155 | 2.22k | blocks++; |
1156 | 2.22k | } |
1157 | 3.57k | } else if (colons == 1) { |
1158 | 2.06k | if (haveZeros) |
1159 | 68 | return false; // only one occurrence is allowed |
1160 | 1.99k | haveZeros = true; |
1161 | 1.99k | } else { |
1162 | 68 | // too many colons in a row |
1163 | 68 | return false; |
1164 | 68 | } |
1165 | 5.57k | colons++; |
1166 | 18.0k | } else if ((*p >= '0' && *p <= '9') || (*p >= 'a' && *p <= 'f') || |
1167 | 18.0k | (*p >= 'A' && *p <= 'F')) { |
1168 | 14.7k | if (colons == 1 && blocks == 0) // starts with a single colon |
1169 | 71 | return false; |
1170 | 14.6k | if (digits == 4) // too many digits |
1171 | 268 | return false; |
1172 | 14.3k | colons = 0; |
1173 | 14.3k | digits++; |
1174 | 14.3k | } else if (*p == '.') { |
1175 | 2.68k | // check valid IPv4 from the beginning of the last block |
1176 | 2.68k | if (!net_IsValidIPv4Addr(p.get() - digits, addrLen + digits)) |
1177 | 2.10k | return false; |
1178 | 589 | return (haveZeros && blocks < 6) || (!haveZeros && blocks == 6); |
1179 | 637 | } else { |
1180 | 637 | // invalid character |
1181 | 637 | return false; |
1182 | 637 | } |
1183 | 23.7k | } |
1184 | 6.14k | |
1185 | 6.14k | if (colons == 1) // ends with a single colon |
1186 | 233 | return false; |
1187 | 2.11k | |
1188 | 2.11k | if (digits) // there is a block at the end |
1189 | 798 | blocks++; |
1190 | 2.11k | |
1191 | 2.11k | return (haveZeros && blocks < 8) || (!haveZeros && blocks == 8); |
1192 | 2.11k | } |