/src/mozilla-central/netwerk/mime/nsMIMEHeaderParamImpl.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
2 | | /* vim: set sw=4 ts=8 et tw=80 : */ |
3 | | /* This Source Code Form is subject to the terms of the Mozilla Public |
4 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
5 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
6 | | |
7 | | #include <string.h> |
8 | | #include "prprf.h" |
9 | | #include "plstr.h" |
10 | | #include "plbase64.h" |
11 | | #include "nsCRT.h" |
12 | | #include "nsMemory.h" |
13 | | #include "nsTArray.h" |
14 | | #include "nsCOMPtr.h" |
15 | | #include "nsEscape.h" |
16 | | #include "nsIServiceManager.h" |
17 | | #include "nsMIMEHeaderParamImpl.h" |
18 | | #include "nsReadableUtils.h" |
19 | | #include "nsNativeCharsetUtils.h" |
20 | | #include "nsError.h" |
21 | | #include "mozilla/Encoding.h" |
22 | | |
23 | | using mozilla::Encoding; |
24 | | |
25 | | // static functions declared below are moved from mailnews/mime/src/comi18n.cpp |
26 | | |
27 | | static char *DecodeQ(const char *, uint32_t); |
28 | | static bool Is7bitNonAsciiString(const char *, uint32_t); |
29 | | static void CopyRawHeader(const char *, uint32_t, const nsACString&, nsACString &); |
30 | | static nsresult DecodeRFC2047Str(const char *, const nsACString&, bool, nsACString&); |
31 | | static nsresult internalDecodeParameter(const nsACString&, const nsACString&, |
32 | | const nsACString&, bool, bool, nsACString&); |
33 | | |
34 | | static nsresult |
35 | | ToUTF8(const nsACString& aString, |
36 | | const nsACString& aCharset, |
37 | | bool aAllowSubstitution, |
38 | | nsACString& aResult) |
39 | 0 | { |
40 | 0 | if (aCharset.IsEmpty()) { |
41 | 0 | return NS_ERROR_INVALID_ARG; |
42 | 0 | } |
43 | 0 | |
44 | 0 | auto encoding = Encoding::ForLabelNoReplacement(aCharset); |
45 | 0 | if (!encoding) { |
46 | 0 | return NS_ERROR_UCONV_NOCONV; |
47 | 0 | } |
48 | 0 | if (aAllowSubstitution) { |
49 | 0 | nsresult rv = encoding->DecodeWithoutBOMHandling(aString, aResult); |
50 | 0 | if (NS_SUCCEEDED(rv)) { |
51 | 0 | return NS_OK; |
52 | 0 | } |
53 | 0 | return rv; |
54 | 0 | } |
55 | 0 | return encoding->DecodeWithoutBOMHandlingAndWithoutReplacement(aString, |
56 | 0 | aResult); |
57 | 0 | } |
58 | | |
59 | | static nsresult |
60 | | ConvertStringToUTF8(const nsACString& aString, |
61 | | const nsACString& aCharset, |
62 | | bool aSkipCheck, |
63 | | bool aAllowSubstitution, |
64 | | nsACString& aUTF8String) |
65 | 0 | { |
66 | 0 | // return if ASCII only or valid UTF-8 providing that the ASCII/UTF-8 |
67 | 0 | // check is requested. It may not be asked for if a caller suspects |
68 | 0 | // that the input is in non-ASCII 7bit charset (ISO-2022-xx, HZ) or |
69 | 0 | // it's in a charset other than UTF-8 that can be mistaken for UTF-8. |
70 | 0 | if (!aSkipCheck && (IsASCII(aString) || IsUTF8(aString))) { |
71 | 0 | aUTF8String = aString; |
72 | 0 | return NS_OK; |
73 | 0 | } |
74 | 0 | |
75 | 0 | aUTF8String.Truncate(); |
76 | 0 |
|
77 | 0 | nsresult rv = ToUTF8(aString, aCharset, aAllowSubstitution, aUTF8String); |
78 | 0 |
|
79 | 0 | // additional protection for cases where check is skipped and the input |
80 | 0 | // is actually in UTF-8 as opposed to aCharset. (i.e. caller's hunch |
81 | 0 | // was wrong.) We don't check ASCIIness assuming there's no charset |
82 | 0 | // incompatible with ASCII (we don't support EBCDIC). |
83 | 0 | if (aSkipCheck && NS_FAILED(rv) && IsUTF8(aString)) { |
84 | 0 | aUTF8String = aString; |
85 | 0 | return NS_OK; |
86 | 0 | } |
87 | 0 | |
88 | 0 | return rv; |
89 | 0 | } |
90 | | |
91 | | // XXX The chance of UTF-7 being used in the message header is really |
92 | | // low, but in theory it's possible. |
93 | | #define IS_7BIT_NON_ASCII_CHARSET(cset) \ |
94 | 0 | (!nsCRT::strncasecmp((cset), "ISO-2022", 8) || \ |
95 | 0 | !nsCRT::strncasecmp((cset), "HZ-GB", 5) || \ |
96 | 0 | !nsCRT::strncasecmp((cset), "UTF-7", 5)) |
97 | | |
98 | | NS_IMPL_ISUPPORTS(nsMIMEHeaderParamImpl, nsIMIMEHeaderParam) |
99 | | |
100 | | NS_IMETHODIMP |
101 | | nsMIMEHeaderParamImpl::GetParameter(const nsACString& aHeaderVal, |
102 | | const char *aParamName, |
103 | | const nsACString& aFallbackCharset, |
104 | | bool aTryLocaleCharset, |
105 | | char **aLang, nsAString& aResult) |
106 | 0 | { |
107 | 0 | return DoGetParameter(aHeaderVal, aParamName, MIME_FIELD_ENCODING, |
108 | 0 | aFallbackCharset, aTryLocaleCharset, aLang, aResult); |
109 | 0 | } |
110 | | |
111 | | NS_IMETHODIMP |
112 | | nsMIMEHeaderParamImpl::GetParameterHTTP(const nsACString& aHeaderVal, |
113 | | const char *aParamName, |
114 | | const nsACString& aFallbackCharset, |
115 | | bool aTryLocaleCharset, |
116 | | char **aLang, nsAString& aResult) |
117 | 0 | { |
118 | 0 | return DoGetParameter(aHeaderVal, aParamName, HTTP_FIELD_ENCODING, |
119 | 0 | aFallbackCharset, aTryLocaleCharset, aLang, aResult); |
120 | 0 | } |
121 | | |
122 | | /* static */ |
123 | | nsresult |
124 | | nsMIMEHeaderParamImpl::GetParameterHTTP(const nsACString& aHeaderVal, |
125 | | const char *aParamName, |
126 | | nsAString& aResult) |
127 | 0 | { |
128 | 0 | return DoGetParameter(aHeaderVal, |
129 | 0 | aParamName, |
130 | 0 | HTTP_FIELD_ENCODING, |
131 | 0 | EmptyCString(), |
132 | 0 | false, |
133 | 0 | nullptr, |
134 | 0 | aResult); |
135 | 0 | } |
136 | | |
137 | | // XXX : aTryLocaleCharset is not yet effective. |
138 | | /* static */ |
139 | | nsresult |
140 | | nsMIMEHeaderParamImpl::DoGetParameter(const nsACString& aHeaderVal, |
141 | | const char *aParamName, |
142 | | ParamDecoding aDecoding, |
143 | | const nsACString& aFallbackCharset, |
144 | | bool aTryLocaleCharset, |
145 | | char **aLang, nsAString& aResult) |
146 | 0 | { |
147 | 0 | aResult.Truncate(); |
148 | 0 | nsresult rv; |
149 | 0 |
|
150 | 0 | // get parameter (decode RFC 2231/5987 when applicable, as specified by |
151 | 0 | // aDecoding (5987 being a subset of 2231) and return charset.) |
152 | 0 | nsCString med; |
153 | 0 | nsCString charset; |
154 | 0 | rv = DoParameterInternal(PromiseFlatCString(aHeaderVal).get(), aParamName, |
155 | 0 | aDecoding, getter_Copies(charset), aLang, |
156 | 0 | getter_Copies(med)); |
157 | 0 | if (NS_FAILED(rv)) |
158 | 0 | return rv; |
159 | 0 | |
160 | 0 | // convert to UTF-8 after charset conversion and RFC 2047 decoding |
161 | 0 | // if necessary. |
162 | 0 | |
163 | 0 | nsAutoCString str1; |
164 | 0 | rv = internalDecodeParameter(med, charset, EmptyCString(), false, |
165 | 0 | // was aDecoding == MIME_FIELD_ENCODING |
166 | 0 | // see bug 875615 |
167 | 0 | true, |
168 | 0 | str1); |
169 | 0 | NS_ENSURE_SUCCESS(rv, rv); |
170 | 0 |
|
171 | 0 | if (!aFallbackCharset.IsEmpty()) |
172 | 0 | { |
173 | 0 | const Encoding* encoding = Encoding::ForLabel(aFallbackCharset); |
174 | 0 | nsAutoCString str2; |
175 | 0 | if (NS_SUCCEEDED(ConvertStringToUTF8(str1, aFallbackCharset, false, |
176 | 0 | encoding != UTF_8_ENCODING, |
177 | 0 | str2))) { |
178 | 0 | CopyUTF8toUTF16(str2, aResult); |
179 | 0 | return NS_OK; |
180 | 0 | } |
181 | 0 | } |
182 | 0 | |
183 | 0 | if (IsUTF8(str1)) { |
184 | 0 | CopyUTF8toUTF16(str1, aResult); |
185 | 0 | return NS_OK; |
186 | 0 | } |
187 | 0 | |
188 | 0 | if (aTryLocaleCharset && !NS_IsNativeUTF8()) |
189 | 0 | return NS_CopyNativeToUnicode(str1, aResult); |
190 | 0 | |
191 | 0 | CopyASCIItoUTF16(str1, aResult); |
192 | 0 | return NS_OK; |
193 | 0 | } |
194 | | |
195 | | // remove backslash-encoded sequences from quoted-strings |
196 | | // modifies string in place, potentially shortening it |
197 | | void RemoveQuotedStringEscapes(char *src) |
198 | 0 | { |
199 | 0 | char *dst = src; |
200 | 0 |
|
201 | 0 | for (char *c = src; *c; ++c) |
202 | 0 | { |
203 | 0 | if (c[0] == '\\' && c[1]) |
204 | 0 | { |
205 | 0 | // skip backslash if not at end |
206 | 0 | ++c; |
207 | 0 | } |
208 | 0 | *dst++ = *c; |
209 | 0 | } |
210 | 0 | *dst = 0; |
211 | 0 | } |
212 | | |
213 | | // true is character is a hex digit |
214 | | bool IsHexDigit(char aChar) |
215 | 0 | { |
216 | 0 | char c = aChar; |
217 | 0 |
|
218 | 0 | return (c >= 'a' && c <= 'f') || |
219 | 0 | (c >= 'A' && c <= 'F') || |
220 | 0 | (c >= '0' && c <= '9'); |
221 | 0 | } |
222 | | |
223 | | // validate that a C String containing %-escapes is syntactically valid |
224 | | bool IsValidPercentEscaped(const char *aValue, int32_t len) |
225 | 0 | { |
226 | 0 | for (int32_t i = 0; i < len; i++) { |
227 | 0 | if (aValue[i] == '%') { |
228 | 0 | if (!IsHexDigit(aValue[i + 1]) || !IsHexDigit(aValue[i + 2])) { |
229 | 0 | return false; |
230 | 0 | } |
231 | 0 | } |
232 | 0 | } |
233 | 0 | return true; |
234 | 0 | } |
235 | | |
236 | | // Support for continuations (RFC 2231, Section 3) |
237 | | |
238 | | // only a sane number supported |
239 | 0 | #define MAX_CONTINUATIONS 999 |
240 | | |
241 | | // part of a continuation |
242 | | |
243 | | class Continuation { |
244 | | public: |
245 | | Continuation(const char *aValue, uint32_t aLength, |
246 | 0 | bool aNeedsPercentDecoding, bool aWasQuotedString) { |
247 | 0 | value = aValue; |
248 | 0 | length = aLength; |
249 | 0 | needsPercentDecoding = aNeedsPercentDecoding; |
250 | 0 | wasQuotedString = aWasQuotedString; |
251 | 0 | } |
252 | 0 | Continuation() { |
253 | 0 | // empty constructor needed for nsTArray |
254 | 0 | value = nullptr; |
255 | 0 | length = 0; |
256 | 0 | needsPercentDecoding = false; |
257 | 0 | wasQuotedString = false; |
258 | 0 | } |
259 | | ~Continuation() = default; |
260 | | |
261 | | const char *value; |
262 | | uint32_t length; |
263 | | bool needsPercentDecoding; |
264 | | bool wasQuotedString; |
265 | | }; |
266 | | |
267 | | // combine segments into a single string, returning the allocated string |
268 | | // (or nullptr) while emptying the list |
269 | | char *combineContinuations(nsTArray<Continuation>& aArray) |
270 | 0 | { |
271 | 0 | // Sanity check |
272 | 0 | if (aArray.Length() == 0) |
273 | 0 | return nullptr; |
274 | 0 | |
275 | 0 | // Get an upper bound for the length |
276 | 0 | uint32_t length = 0; |
277 | 0 | for (uint32_t i = 0; i < aArray.Length(); i++) { |
278 | 0 | length += aArray[i].length; |
279 | 0 | } |
280 | 0 |
|
281 | 0 | // Allocate |
282 | 0 | char *result = (char *) moz_xmalloc(length + 1); |
283 | 0 |
|
284 | 0 | // Concatenate |
285 | 0 | *result = '\0'; |
286 | 0 |
|
287 | 0 | for (uint32_t i = 0; i < aArray.Length(); i++) { |
288 | 0 | Continuation cont = aArray[i]; |
289 | 0 | if (! cont.value) break; |
290 | 0 | |
291 | 0 | char *c = result + strlen(result); |
292 | 0 | strncat(result, cont.value, cont.length); |
293 | 0 | if (cont.needsPercentDecoding) { |
294 | 0 | nsUnescape(c); |
295 | 0 | } |
296 | 0 | if (cont.wasQuotedString) { |
297 | 0 | RemoveQuotedStringEscapes(c); |
298 | 0 | } |
299 | 0 | } |
300 | 0 |
|
301 | 0 | // return null if empty value |
302 | 0 | if (*result == '\0') { |
303 | 0 | free(result); |
304 | 0 | result = nullptr; |
305 | 0 | } |
306 | 0 |
|
307 | 0 | return result; |
308 | 0 | } |
309 | | |
310 | | // add a continuation, return false on error if segment already has been seen |
311 | | bool addContinuation(nsTArray<Continuation>& aArray, uint32_t aIndex, |
312 | | const char *aValue, uint32_t aLength, |
313 | | bool aNeedsPercentDecoding, bool aWasQuotedString) |
314 | 0 | { |
315 | 0 | if (aIndex < aArray.Length() && aArray[aIndex].value) { |
316 | 0 | NS_WARNING("duplicate RC2231 continuation segment #\n"); |
317 | 0 | return false; |
318 | 0 | } |
319 | 0 |
|
320 | 0 | if (aIndex > MAX_CONTINUATIONS) { |
321 | 0 | NS_WARNING("RC2231 continuation segment # exceeds limit\n"); |
322 | 0 | return false; |
323 | 0 | } |
324 | 0 |
|
325 | 0 | if (aNeedsPercentDecoding && aWasQuotedString) { |
326 | 0 | NS_WARNING("RC2231 continuation segment can't use percent encoding and quoted string form at the same time\n"); |
327 | 0 | return false; |
328 | 0 | } |
329 | 0 |
|
330 | 0 | Continuation cont(aValue, aLength, aNeedsPercentDecoding, aWasQuotedString); |
331 | 0 |
|
332 | 0 | if (aArray.Length() <= aIndex) { |
333 | 0 | aArray.SetLength(aIndex + 1); |
334 | 0 | } |
335 | 0 | aArray[aIndex] = cont; |
336 | 0 |
|
337 | 0 | return true; |
338 | 0 | } |
339 | | |
340 | | // parse a segment number; return -1 on error |
341 | | int32_t parseSegmentNumber(const char *aValue, int32_t aLen) |
342 | 0 | { |
343 | 0 | if (aLen < 1) { |
344 | 0 | NS_WARNING("segment number missing\n"); |
345 | 0 | return -1; |
346 | 0 | } |
347 | 0 |
|
348 | 0 | if (aLen > 1 && aValue[0] == '0') { |
349 | 0 | NS_WARNING("leading '0' not allowed in segment number\n"); |
350 | 0 | return -1; |
351 | 0 | } |
352 | 0 |
|
353 | 0 | int32_t segmentNumber = 0; |
354 | 0 |
|
355 | 0 | for (int32_t i = 0; i < aLen; i++) { |
356 | 0 | if (! (aValue[i] >= '0' && aValue[i] <= '9')) { |
357 | 0 | NS_WARNING("invalid characters in segment number\n"); |
358 | 0 | return -1; |
359 | 0 | } |
360 | 0 |
|
361 | 0 | segmentNumber *= 10; |
362 | 0 | segmentNumber += aValue[i] - '0'; |
363 | 0 | if (segmentNumber > MAX_CONTINUATIONS) { |
364 | 0 | NS_WARNING("Segment number exceeds sane size\n"); |
365 | 0 | return -1; |
366 | 0 | } |
367 | 0 | } |
368 | 0 |
|
369 | 0 | return segmentNumber; |
370 | 0 | } |
371 | | |
372 | | // validate a given octet sequence for compliance with the specified |
373 | | // encoding |
374 | | bool IsValidOctetSequenceForCharset(const nsACString& aCharset, const char *aOctets) |
375 | 0 | { |
376 | 0 | nsAutoCString tmpRaw; |
377 | 0 | tmpRaw.Assign(aOctets); |
378 | 0 | nsAutoCString tmpDecoded; |
379 | 0 |
|
380 | 0 | nsresult rv = ConvertStringToUTF8(tmpRaw, aCharset, |
381 | 0 | false, false, tmpDecoded); |
382 | 0 |
|
383 | 0 | if (rv != NS_OK) { |
384 | 0 | // we can't decode; charset may be unsupported, or the octet sequence |
385 | 0 | // is broken (illegal or incomplete octet sequence contained) |
386 | 0 | NS_WARNING("RFC2231/5987 parameter value does not decode according to specified charset\n"); |
387 | 0 | return false; |
388 | 0 | } |
389 | 0 |
|
390 | 0 | return true; |
391 | 0 | } |
392 | | |
393 | | // moved almost verbatim from mimehdrs.cpp |
394 | | // char * |
395 | | // MimeHeaders_get_parameter (const char *header_value, const char *parm_name, |
396 | | // char **charset, char **language) |
397 | | // |
398 | | // The format of these header lines is |
399 | | // <token> [ ';' <token> '=' <token-or-quoted-string> ]* |
400 | | NS_IMETHODIMP |
401 | | nsMIMEHeaderParamImpl::GetParameterInternal(const char *aHeaderValue, |
402 | | const char *aParamName, |
403 | | char **aCharset, |
404 | | char **aLang, |
405 | | char **aResult) |
406 | 0 | { |
407 | 0 | return DoParameterInternal(aHeaderValue, aParamName, MIME_FIELD_ENCODING, |
408 | 0 | aCharset, aLang, aResult); |
409 | 0 | } |
410 | | |
411 | | |
412 | | /* static */ |
413 | | nsresult |
414 | | nsMIMEHeaderParamImpl::DoParameterInternal(const char *aHeaderValue, |
415 | | const char *aParamName, |
416 | | ParamDecoding aDecoding, |
417 | | char **aCharset, |
418 | | char **aLang, |
419 | | char **aResult) |
420 | 0 | { |
421 | 0 |
|
422 | 0 | if (!aHeaderValue || !*aHeaderValue || !aResult) |
423 | 0 | return NS_ERROR_INVALID_ARG; |
424 | 0 | |
425 | 0 | *aResult = nullptr; |
426 | 0 |
|
427 | 0 | if (aCharset) *aCharset = nullptr; |
428 | 0 | if (aLang) *aLang = nullptr; |
429 | 0 |
|
430 | 0 | nsAutoCString charset; |
431 | 0 |
|
432 | 0 | // change to (aDecoding != HTTP_FIELD_ENCODING) when we want to disable |
433 | 0 | // them for HTTP header fields later on, see bug 776324 |
434 | 0 | bool acceptContinuations = true; |
435 | 0 |
|
436 | 0 | const char *str = aHeaderValue; |
437 | 0 |
|
438 | 0 | // skip leading white space. |
439 | 0 | for (; *str && nsCRT::IsAsciiSpace(*str); ++str) |
440 | 0 | ; |
441 | 0 | const char *start = str; |
442 | 0 |
|
443 | 0 | // aParamName is empty. return the first (possibly) _unnamed_ 'parameter' |
444 | 0 | // For instance, return 'inline' in the following case: |
445 | 0 | // Content-Disposition: inline; filename=..... |
446 | 0 | if (!aParamName || !*aParamName) |
447 | 0 | { |
448 | 0 | for (; *str && *str != ';' && !nsCRT::IsAsciiSpace(*str); ++str) |
449 | 0 | ; |
450 | 0 | if (str == start) |
451 | 0 | return NS_ERROR_FIRST_HEADER_FIELD_COMPONENT_EMPTY; |
452 | 0 | |
453 | 0 | *aResult = (char*) moz_xmemdup(start, (str - start) + 1); |
454 | 0 | (*aResult)[str - start] = '\0'; // null-terminate |
455 | 0 | return NS_OK; |
456 | 0 | } |
457 | 0 | |
458 | 0 | /* Skip forward to first ';' */ |
459 | 0 | for (; *str && *str != ';' && *str != ','; ++str) |
460 | 0 | ; |
461 | 0 | if (*str) |
462 | 0 | str++; |
463 | 0 | /* Skip over following whitespace */ |
464 | 0 | for (; *str && nsCRT::IsAsciiSpace(*str); ++str) |
465 | 0 | ; |
466 | 0 |
|
467 | 0 | // Some broken http servers just specify parameters |
468 | 0 | // like 'filename' without specifying disposition |
469 | 0 | // method. Rewind to the first non-white-space |
470 | 0 | // character. |
471 | 0 |
|
472 | 0 | if (!*str) |
473 | 0 | str = start; |
474 | 0 |
|
475 | 0 | // RFC2231 - The legitimate parm format can be: |
476 | 0 | // A. title=ThisIsTitle |
477 | 0 | // B. title*=us-ascii'en-us'This%20is%20wierd. |
478 | 0 | // C. title*0*=us-ascii'en'This%20is%20wierd.%20We |
479 | 0 | // title*1*=have%20to%20support%20this. |
480 | 0 | // title*2="Else..." |
481 | 0 | // D. title*0="Hey, what you think you are doing?" |
482 | 0 | // title*1="There is no charset and lang info." |
483 | 0 | // RFC5987: only A and B |
484 | 0 |
|
485 | 0 | // collect results for the different algorithms (plain filename, |
486 | 0 | // RFC5987/2231-encoded filename, + continuations) separately and decide |
487 | 0 | // which to use at the end |
488 | 0 | char *caseAResult = nullptr; |
489 | 0 | char *caseBResult = nullptr; |
490 | 0 | char *caseCDResult = nullptr; |
491 | 0 |
|
492 | 0 | // collect continuation segments |
493 | 0 | nsTArray<Continuation> segments; |
494 | 0 |
|
495 | 0 |
|
496 | 0 | // our copies of the charset parameter, kept separately as they might |
497 | 0 | // differ for the two formats |
498 | 0 | nsDependentCSubstring charsetB, charsetCD; |
499 | 0 |
|
500 | 0 | nsDependentCSubstring lang; |
501 | 0 |
|
502 | 0 | int32_t paramLen = strlen(aParamName); |
503 | 0 |
|
504 | 0 | while (*str) { |
505 | 0 | // find name/value |
506 | 0 |
|
507 | 0 | const char *nameStart = str; |
508 | 0 | const char *nameEnd = nullptr; |
509 | 0 | const char *valueStart = nullptr; |
510 | 0 | const char *valueEnd = nullptr; |
511 | 0 | bool isQuotedString = false; |
512 | 0 |
|
513 | 0 | NS_ASSERTION(!nsCRT::IsAsciiSpace(*str), "should be after whitespace."); |
514 | 0 |
|
515 | 0 | // Skip forward to the end of this token. |
516 | 0 | for (; *str && !nsCRT::IsAsciiSpace(*str) && *str != '=' && *str != ';'; str++) |
517 | 0 | ; |
518 | 0 | nameEnd = str; |
519 | 0 |
|
520 | 0 | int32_t nameLen = nameEnd - nameStart; |
521 | 0 |
|
522 | 0 | // Skip over whitespace, '=', and whitespace |
523 | 0 | while (nsCRT::IsAsciiSpace(*str)) ++str; |
524 | 0 | if (!*str) { |
525 | 0 | break; |
526 | 0 | } |
527 | 0 | if (*str != '=') { |
528 | 0 | // don't accept parameters without "=" |
529 | 0 | goto increment_str; |
530 | 0 | } |
531 | 0 | // Skip over '=' only if it was actually there |
532 | 0 | str++; |
533 | 0 | while (nsCRT::IsAsciiSpace(*str)) ++str; |
534 | 0 |
|
535 | 0 | if (*str != '"') { |
536 | 0 | // The value is a token, not a quoted string. |
537 | 0 | valueStart = str; |
538 | 0 | for (valueEnd = str; |
539 | 0 | *valueEnd && !nsCRT::IsAsciiSpace (*valueEnd) && *valueEnd != ';'; |
540 | 0 | valueEnd++) |
541 | 0 | ; |
542 | 0 | str = valueEnd; |
543 | 0 | } else { |
544 | 0 | isQuotedString = true; |
545 | 0 |
|
546 | 0 | ++str; |
547 | 0 | valueStart = str; |
548 | 0 | for (valueEnd = str; *valueEnd; ++valueEnd) { |
549 | 0 | if (*valueEnd == '\\' && *(valueEnd + 1)) |
550 | 0 | ++valueEnd; |
551 | 0 | else if (*valueEnd == '"') |
552 | 0 | break; |
553 | 0 | } |
554 | 0 | str = valueEnd; |
555 | 0 | // *valueEnd != null means that *valueEnd is quote character. |
556 | 0 | if (*valueEnd) |
557 | 0 | str++; |
558 | 0 | } |
559 | 0 |
|
560 | 0 | // See if this is the simplest case (case A above), |
561 | 0 | // a 'single' line value with no charset and lang. |
562 | 0 | // If so, copy it and return. |
563 | 0 | if (nameLen == paramLen && |
564 | 0 | !nsCRT::strncasecmp(nameStart, aParamName, paramLen)) { |
565 | 0 |
|
566 | 0 | if (caseAResult) { |
567 | 0 | // we already have one caseA result, ignore subsequent ones |
568 | 0 | goto increment_str; |
569 | 0 | } |
570 | 0 | |
571 | 0 | // if the parameter spans across multiple lines we have to strip out the |
572 | 0 | // line continuation -- jht 4/29/98 |
573 | 0 | nsAutoCString tempStr(valueStart, valueEnd - valueStart); |
574 | 0 | tempStr.StripCRLF(); |
575 | 0 | char *res = ToNewCString(tempStr); |
576 | 0 | NS_ENSURE_TRUE(res, NS_ERROR_OUT_OF_MEMORY); |
577 | 0 |
|
578 | 0 | if (isQuotedString) |
579 | 0 | RemoveQuotedStringEscapes(res); |
580 | 0 |
|
581 | 0 | caseAResult = res; |
582 | 0 | // keep going, we may find a RFC 2231/5987 encoded alternative |
583 | 0 | } |
584 | 0 | // case B, C, and D |
585 | 0 | else if (nameLen > paramLen && |
586 | 0 | !nsCRT::strncasecmp(nameStart, aParamName, paramLen) && |
587 | 0 | *(nameStart + paramLen) == '*') { |
588 | 0 |
|
589 | 0 | // 1st char past '*' |
590 | 0 | const char *cp = nameStart + paramLen + 1; |
591 | 0 |
|
592 | 0 | // if param name ends in "*" we need do to RFC5987 "ext-value" decoding |
593 | 0 | bool needExtDecoding = *(nameEnd - 1) == '*'; |
594 | 0 |
|
595 | 0 | bool caseB = nameLen == paramLen + 1; |
596 | 0 | bool caseCStart = (*cp == '0') && needExtDecoding; |
597 | 0 |
|
598 | 0 | // parse the segment number |
599 | 0 | int32_t segmentNumber = -1; |
600 | 0 | if (!caseB) { |
601 | 0 | int32_t segLen = (nameEnd - cp) - (needExtDecoding ? 1 : 0); |
602 | 0 | segmentNumber = parseSegmentNumber(cp, segLen); |
603 | 0 |
|
604 | 0 | if (segmentNumber == -1) { |
605 | 0 | acceptContinuations = false; |
606 | 0 | goto increment_str; |
607 | 0 | } |
608 | 0 | } |
609 | 0 | |
610 | 0 | // CaseB and start of CaseC: requires charset and optional language |
611 | 0 | // in quotes (quotes required even if lang is blank) |
612 | 0 | if (caseB || (caseCStart && acceptContinuations)) { |
613 | 0 | // look for single quotation mark(') |
614 | 0 | const char *sQuote1 = PL_strchr(valueStart, 0x27); |
615 | 0 | const char *sQuote2 = sQuote1 ? PL_strchr(sQuote1 + 1, 0x27) : nullptr; |
616 | 0 |
|
617 | 0 | // Two single quotation marks must be present even in |
618 | 0 | // absence of charset and lang. |
619 | 0 | if (!sQuote1 || !sQuote2) { |
620 | 0 | NS_WARNING("Mandatory two single quotes are missing in header parameter\n"); |
621 | 0 | } |
622 | 0 |
|
623 | 0 | const char *charsetStart = nullptr; |
624 | 0 | int32_t charsetLength = 0; |
625 | 0 | const char *langStart = nullptr; |
626 | 0 | int32_t langLength = 0; |
627 | 0 | const char *rawValStart = nullptr; |
628 | 0 | int32_t rawValLength = 0; |
629 | 0 |
|
630 | 0 | if (sQuote2 && sQuote1) { |
631 | 0 | // both delimiters present: charSet'lang'rawVal |
632 | 0 | rawValStart = sQuote2 + 1; |
633 | 0 | rawValLength = valueEnd - rawValStart; |
634 | 0 |
|
635 | 0 | langStart = sQuote1 + 1; |
636 | 0 | langLength = sQuote2 - langStart; |
637 | 0 |
|
638 | 0 | charsetStart = valueStart; |
639 | 0 | charsetLength = sQuote1 - charsetStart; |
640 | 0 | } |
641 | 0 | else if (sQuote1) { |
642 | 0 | // one delimiter; assume charset'rawVal |
643 | 0 | rawValStart = sQuote1 + 1; |
644 | 0 | rawValLength = valueEnd - rawValStart; |
645 | 0 |
|
646 | 0 | charsetStart = valueStart; |
647 | 0 | charsetLength = sQuote1 - valueStart; |
648 | 0 | } |
649 | 0 | else { |
650 | 0 | // no delimiter: just rawVal |
651 | 0 | rawValStart = valueStart; |
652 | 0 | rawValLength = valueEnd - valueStart; |
653 | 0 | } |
654 | 0 |
|
655 | 0 | if (langLength != 0) { |
656 | 0 | lang.Assign(langStart, langLength); |
657 | 0 | } |
658 | 0 |
|
659 | 0 | // keep the charset for later |
660 | 0 | if (caseB) { |
661 | 0 | charsetB.Assign(charsetStart, charsetLength); |
662 | 0 | } else { |
663 | 0 | // if caseCorD |
664 | 0 | charsetCD.Assign(charsetStart, charsetLength); |
665 | 0 | } |
666 | 0 |
|
667 | 0 | // non-empty value part |
668 | 0 | if (rawValLength > 0) { |
669 | 0 | if (!caseBResult && caseB) { |
670 | 0 | if (!IsValidPercentEscaped(rawValStart, rawValLength)) { |
671 | 0 | goto increment_str; |
672 | 0 | } |
673 | 0 | |
674 | 0 | // allocate buffer for the raw value |
675 | 0 | char* tmpResult = |
676 | 0 | (char*) moz_xmemdup(rawValStart, rawValLength + 1); |
677 | 0 | *(tmpResult + rawValLength) = 0; |
678 | 0 |
|
679 | 0 | nsUnescape(tmpResult); |
680 | 0 | caseBResult = tmpResult; |
681 | 0 | } else { |
682 | 0 | // caseC |
683 | 0 | bool added = addContinuation(segments, 0, rawValStart, |
684 | 0 | rawValLength, needExtDecoding, |
685 | 0 | isQuotedString); |
686 | 0 |
|
687 | 0 | if (!added) { |
688 | 0 | // continuation not added, stop processing them |
689 | 0 | acceptContinuations = false; |
690 | 0 | } |
691 | 0 | } |
692 | 0 | } |
693 | 0 | } // end of if-block : title*0*= or title*= |
694 | 0 | // caseD: a line of multiline param with no need for unescaping : title*[0-9]= |
695 | 0 | // or 2nd or later lines of a caseC param : title*[1-9]*= |
696 | 0 | else if (acceptContinuations && segmentNumber != -1) { |
697 | 0 | uint32_t valueLength = valueEnd - valueStart; |
698 | 0 |
|
699 | 0 | bool added = addContinuation(segments, segmentNumber, valueStart, |
700 | 0 | valueLength, needExtDecoding, |
701 | 0 | isQuotedString); |
702 | 0 |
|
703 | 0 | if (!added) { |
704 | 0 | // continuation not added, stop processing them |
705 | 0 | acceptContinuations = false; |
706 | 0 | } |
707 | 0 | } // end of if-block : title*[0-9]= or title*[1-9]*= |
708 | 0 | } |
709 | 0 |
|
710 | 0 | // str now points after the end of the value. |
711 | 0 | // skip over whitespace, ';', whitespace. |
712 | 0 | increment_str: |
713 | 0 | while (nsCRT::IsAsciiSpace(*str)) ++str; |
714 | 0 | if (*str == ';') { |
715 | 0 | ++str; |
716 | 0 | } else { |
717 | 0 | // stop processing the header field; either we are done or the |
718 | 0 | // separator was missing |
719 | 0 | break; |
720 | 0 | } |
721 | 0 | while (nsCRT::IsAsciiSpace(*str)) ++str; |
722 | 0 | } |
723 | 0 |
|
724 | 0 | caseCDResult = combineContinuations(segments); |
725 | 0 |
|
726 | 0 | if (caseBResult && !charsetB.IsEmpty()) { |
727 | 0 | // check that the 2231/5987 result decodes properly given the |
728 | 0 | // specified character set |
729 | 0 | if (!IsValidOctetSequenceForCharset(charsetB, caseBResult)) |
730 | 0 | caseBResult = nullptr; |
731 | 0 | } |
732 | 0 |
|
733 | 0 | if (caseCDResult && !charsetCD.IsEmpty()) { |
734 | 0 | // check that the 2231/5987 result decodes properly given the |
735 | 0 | // specified character set |
736 | 0 | if (!IsValidOctetSequenceForCharset(charsetCD, caseCDResult)) |
737 | 0 | caseCDResult = nullptr; |
738 | 0 | } |
739 | 0 |
|
740 | 0 | if (caseBResult) { |
741 | 0 | // prefer simple 5987 format over 2231 with continuations |
742 | 0 | *aResult = caseBResult; |
743 | 0 | caseBResult = nullptr; |
744 | 0 | charset.Assign(charsetB); |
745 | 0 | } |
746 | 0 | else if (caseCDResult) { |
747 | 0 | // prefer 2231/5987 with or without continuations over plain format |
748 | 0 | *aResult = caseCDResult; |
749 | 0 | caseCDResult = nullptr; |
750 | 0 | charset.Assign(charsetCD); |
751 | 0 | } |
752 | 0 | else if (caseAResult) { |
753 | 0 | *aResult = caseAResult; |
754 | 0 | caseAResult = nullptr; |
755 | 0 | } |
756 | 0 |
|
757 | 0 | // free unused stuff |
758 | 0 | free(caseAResult); |
759 | 0 | free(caseBResult); |
760 | 0 | free(caseCDResult); |
761 | 0 |
|
762 | 0 | // if we have a result |
763 | 0 | if (*aResult) { |
764 | 0 | // then return charset and lang as well |
765 | 0 | if (aLang && !lang.IsEmpty()) { |
766 | 0 | uint32_t len = lang.Length(); |
767 | 0 | *aLang = (char*) moz_xmemdup(lang.BeginReading(), len + 1); |
768 | 0 | *(*aLang + len) = 0; |
769 | 0 | } |
770 | 0 | if (aCharset && !charset.IsEmpty()) { |
771 | 0 | uint32_t len = charset.Length(); |
772 | 0 | *aCharset = (char*) moz_xmemdup(charset.BeginReading(), len + 1); |
773 | 0 | *(*aCharset + len) = 0; |
774 | 0 | } |
775 | 0 | } |
776 | 0 |
|
777 | 0 | return *aResult ? NS_OK : NS_ERROR_INVALID_ARG; |
778 | 0 | } |
779 | | |
780 | | nsresult |
781 | | internalDecodeRFC2047Header(const char* aHeaderVal, const nsACString& aDefaultCharset, |
782 | | bool aOverrideCharset, bool aEatContinuations, |
783 | | nsACString& aResult) |
784 | 0 | { |
785 | 0 | aResult.Truncate(); |
786 | 0 | if (!aHeaderVal) |
787 | 0 | return NS_ERROR_INVALID_ARG; |
788 | 0 | if (!*aHeaderVal) |
789 | 0 | return NS_OK; |
790 | 0 | |
791 | 0 | |
792 | 0 | // If aHeaderVal is RFC 2047 encoded or is not a UTF-8 string but |
793 | 0 | // aDefaultCharset is specified, decodes RFC 2047 encoding and converts |
794 | 0 | // to UTF-8. Otherwise, just strips away CRLF. |
795 | 0 | if (PL_strstr(aHeaderVal, "=?") || |
796 | 0 | (!aDefaultCharset.IsEmpty() && (!IsUTF8(nsDependentCString(aHeaderVal)) || |
797 | 0 | Is7bitNonAsciiString(aHeaderVal, strlen(aHeaderVal))))) { |
798 | 0 | DecodeRFC2047Str(aHeaderVal, aDefaultCharset, aOverrideCharset, aResult); |
799 | 0 | } else if (aEatContinuations && |
800 | 0 | (PL_strchr(aHeaderVal, '\n') || PL_strchr(aHeaderVal, '\r'))) { |
801 | 0 | aResult = aHeaderVal; |
802 | 0 | } else { |
803 | 0 | aEatContinuations = false; |
804 | 0 | aResult = aHeaderVal; |
805 | 0 | } |
806 | 0 |
|
807 | 0 | if (aEatContinuations) { |
808 | 0 | nsAutoCString temp(aResult); |
809 | 0 | temp.ReplaceSubstring("\n\t", " "); |
810 | 0 | temp.ReplaceSubstring("\r\t", " "); |
811 | 0 | temp.StripCRLF(); |
812 | 0 | aResult = temp; |
813 | 0 | } |
814 | 0 |
|
815 | 0 | return NS_OK; |
816 | 0 | } |
817 | | |
818 | | NS_IMETHODIMP |
819 | | nsMIMEHeaderParamImpl::DecodeRFC2047Header(const char* aHeaderVal, |
820 | | const char* aDefaultCharset, |
821 | | bool aOverrideCharset, |
822 | | bool aEatContinuations, |
823 | | nsACString& aResult) |
824 | 0 | { |
825 | 0 | return internalDecodeRFC2047Header(aHeaderVal, |
826 | 0 | nsCString(aDefaultCharset), |
827 | 0 | aOverrideCharset, aEatContinuations, |
828 | 0 | aResult); |
829 | 0 | } |
830 | | |
831 | | // true if the character is allowed in a RFC 5987 value |
832 | | // see RFC 5987, Section 3.2.1, "attr-char" |
833 | | bool IsRFC5987AttrChar(char aChar) |
834 | 0 | { |
835 | 0 | char c = aChar; |
836 | 0 |
|
837 | 0 | return (c >= 'a' && c <= 'z') || |
838 | 0 | (c >= 'A' && c <= 'Z') || |
839 | 0 | (c >= '0' && c <= '9') || |
840 | 0 | (c == '!' || c == '#' || c == '$' || c == '&' || |
841 | 0 | c == '+' || c == '-' || c == '.' || c == '^' || |
842 | 0 | c == '_' || c == '`' || c == '|' || c == '~'); |
843 | 0 | } |
844 | | |
845 | | // percent-decode a value |
846 | | // returns false on failure |
847 | | bool PercentDecode(nsACString& aValue) |
848 | 0 | { |
849 | 0 | char *c = (char *) moz_xmalloc(aValue.Length() + 1); |
850 | 0 |
|
851 | 0 | strcpy(c, PromiseFlatCString(aValue).get()); |
852 | 0 | nsUnescape(c); |
853 | 0 | aValue.Assign(c); |
854 | 0 | free(c); |
855 | 0 |
|
856 | 0 | return true; |
857 | 0 | } |
858 | | |
859 | | // Decode a parameter value using the encoding defined in RFC 5987 |
860 | | // |
861 | | // charset "'" [ language ] "'" value-chars |
862 | | NS_IMETHODIMP |
863 | | nsMIMEHeaderParamImpl::DecodeRFC5987Param(const nsACString& aParamVal, |
864 | | nsACString& aLang, |
865 | | nsAString& aResult) |
866 | 0 | { |
867 | 0 | nsAutoCString charset; |
868 | 0 | nsAutoCString language; |
869 | 0 | nsAutoCString value; |
870 | 0 |
|
871 | 0 | uint32_t delimiters = 0; |
872 | 0 | const nsCString& encoded = PromiseFlatCString(aParamVal); |
873 | 0 | const char *c = encoded.get(); |
874 | 0 |
|
875 | 0 | while (*c) { |
876 | 0 | char tc = *c++; |
877 | 0 |
|
878 | 0 | if (tc == '\'') { |
879 | 0 | // single quote |
880 | 0 | delimiters++; |
881 | 0 | } else if (((unsigned char)tc) >= 128) { |
882 | 0 | // fail early, not ASCII |
883 | 0 | NS_WARNING("non-US-ASCII character in RFC5987-encoded param"); |
884 | 0 | return NS_ERROR_INVALID_ARG; |
885 | 0 | } else { |
886 | 0 | if (delimiters == 0) { |
887 | 0 | // valid characters are checked later implicitly |
888 | 0 | charset.Append(tc); |
889 | 0 | } else if (delimiters == 1) { |
890 | 0 | // no value checking for now |
891 | 0 | language.Append(tc); |
892 | 0 | } else if (delimiters == 2) { |
893 | 0 | if (IsRFC5987AttrChar(tc)) { |
894 | 0 | value.Append(tc); |
895 | 0 | } else if (tc == '%') { |
896 | 0 | if (!IsHexDigit(c[0]) || !IsHexDigit(c[1])) { |
897 | 0 | // we expect two more characters |
898 | 0 | NS_WARNING("broken %-escape in RFC5987-encoded param"); |
899 | 0 | return NS_ERROR_INVALID_ARG; |
900 | 0 | } |
901 | 0 | value.Append(tc); |
902 | 0 | // we consume two more |
903 | 0 | value.Append(*c++); |
904 | 0 | value.Append(*c++); |
905 | 0 | } else { |
906 | 0 | // character not allowed here |
907 | 0 | NS_WARNING("invalid character in RFC5987-encoded param"); |
908 | 0 | return NS_ERROR_INVALID_ARG; |
909 | 0 | } |
910 | 0 | } |
911 | 0 | } |
912 | 0 | } |
913 | 0 |
|
914 | 0 | if (delimiters != 2) { |
915 | 0 | NS_WARNING("missing delimiters in RFC5987-encoded param"); |
916 | 0 | return NS_ERROR_INVALID_ARG; |
917 | 0 | } |
918 | 0 |
|
919 | 0 | // abort early for unsupported encodings |
920 | 0 | if (!charset.LowerCaseEqualsLiteral("utf-8")) { |
921 | 0 | NS_WARNING("unsupported charset in RFC5987-encoded param"); |
922 | 0 | return NS_ERROR_INVALID_ARG; |
923 | 0 | } |
924 | 0 |
|
925 | 0 | // percent-decode |
926 | 0 | if (!PercentDecode(value)) { |
927 | 0 | return NS_ERROR_OUT_OF_MEMORY; |
928 | 0 | } |
929 | 0 | |
930 | 0 | // return the encoding |
931 | 0 | aLang.Assign(language); |
932 | 0 |
|
933 | 0 | // finally convert octet sequence to UTF-8 and be done |
934 | 0 | nsAutoCString utf8; |
935 | 0 | nsresult rv = ConvertStringToUTF8(value, charset, true, false, utf8); |
936 | 0 | NS_ENSURE_SUCCESS(rv, rv); |
937 | 0 |
|
938 | 0 | CopyUTF8toUTF16(utf8, aResult); |
939 | 0 | return NS_OK; |
940 | 0 | } |
941 | | |
942 | | nsresult |
943 | | internalDecodeParameter(const nsACString& aParamValue, const nsACString& aCharset, |
944 | | const nsACString& aDefaultCharset, bool aOverrideCharset, |
945 | | bool aDecode2047, nsACString& aResult) |
946 | 0 | { |
947 | 0 | aResult.Truncate(); |
948 | 0 | // If aCharset is given, aParamValue was obtained from RFC2231/5987 |
949 | 0 | // encoding and we're pretty sure that it's in aCharset. |
950 | 0 | if (!aCharset.IsEmpty()) |
951 | 0 | { |
952 | 0 | return ConvertStringToUTF8(aParamValue, aCharset, true, true, aResult); |
953 | 0 | } |
954 | 0 | |
955 | 0 | const nsCString& param = PromiseFlatCString(aParamValue); |
956 | 0 | nsAutoCString unQuoted; |
957 | 0 | nsACString::const_iterator s, e; |
958 | 0 | param.BeginReading(s); |
959 | 0 | param.EndReading(e); |
960 | 0 |
|
961 | 0 | // strip '\' when used to quote CR, LF, '"' and '\' |
962 | 0 | for ( ; s != e; ++s) { |
963 | 0 | if ((*s == '\\')) { |
964 | 0 | if (++s == e) { |
965 | 0 | --s; // '\' is at the end. move back and append '\'. |
966 | 0 | } |
967 | 0 | else if (*s != nsCRT::CR && *s != nsCRT::LF && *s != '"' && *s != '\\') { |
968 | 0 | --s; // '\' is not foll. by CR,LF,'"','\'. move back and append '\' |
969 | 0 | } |
970 | 0 | // else : skip '\' and append the quoted character. |
971 | 0 | } |
972 | 0 | unQuoted.Append(*s); |
973 | 0 | } |
974 | 0 |
|
975 | 0 | aResult = unQuoted; |
976 | 0 | nsresult rv = NS_OK; |
977 | 0 |
|
978 | 0 | if (aDecode2047) { |
979 | 0 | nsAutoCString decoded; |
980 | 0 |
|
981 | 0 | // Try RFC 2047 encoding, instead. |
982 | 0 | rv = internalDecodeRFC2047Header(unQuoted.get(), aDefaultCharset, |
983 | 0 | aOverrideCharset, true, decoded); |
984 | 0 |
|
985 | 0 | if (NS_SUCCEEDED(rv) && !decoded.IsEmpty()) |
986 | 0 | aResult = decoded; |
987 | 0 | } |
988 | 0 |
|
989 | 0 | return rv; |
990 | 0 | } |
991 | | |
992 | | NS_IMETHODIMP |
993 | | nsMIMEHeaderParamImpl::DecodeParameter(const nsACString& aParamValue, |
994 | | const char* aCharset, |
995 | | const char* aDefaultCharset, |
996 | | bool aOverrideCharset, |
997 | | nsACString& aResult) |
998 | 0 | { |
999 | 0 | return internalDecodeParameter(aParamValue, nsCString(aCharset), |
1000 | 0 | nsCString(aDefaultCharset), |
1001 | 0 | aOverrideCharset, true, aResult); |
1002 | 0 | } |
1003 | | |
1004 | | #define ISHEXCHAR(c) \ |
1005 | 0 | ((0x30 <= uint8_t(c) && uint8_t(c) <= 0x39) || \ |
1006 | 0 | (0x41 <= uint8_t(c) && uint8_t(c) <= 0x46) || \ |
1007 | 0 | (0x61 <= uint8_t(c) && uint8_t(c) <= 0x66)) |
1008 | | |
1009 | | // Decode Q encoding (RFC 2047). |
1010 | | // static |
1011 | | char *DecodeQ(const char *in, uint32_t length) |
1012 | 0 | { |
1013 | 0 | char *out, *dest = nullptr; |
1014 | 0 |
|
1015 | 0 | out = dest = (char*) calloc(length + 1, sizeof(char)); |
1016 | 0 | if (dest == nullptr) |
1017 | 0 | return nullptr; |
1018 | 0 | while (length > 0) { |
1019 | 0 | unsigned c = 0; |
1020 | 0 | switch (*in) { |
1021 | 0 | case '=': |
1022 | 0 | // check if |in| in the form of '=hh' where h is [0-9a-fA-F]. |
1023 | 0 | if (length < 3 || !ISHEXCHAR(in[1]) || !ISHEXCHAR(in[2])) |
1024 | 0 | goto badsyntax; |
1025 | 0 | PR_sscanf(in + 1, "%2X", &c); |
1026 | 0 | *out++ = (char) c; |
1027 | 0 | in += 3; |
1028 | 0 | length -= 3; |
1029 | 0 | break; |
1030 | 0 |
|
1031 | 0 | case '_': |
1032 | 0 | *out++ = ' '; |
1033 | 0 | in++; |
1034 | 0 | length--; |
1035 | 0 | break; |
1036 | 0 |
|
1037 | 0 | default: |
1038 | 0 | if (*in & 0x80) goto badsyntax; |
1039 | 0 | *out++ = *in++; |
1040 | 0 | length--; |
1041 | 0 | } |
1042 | 0 | } |
1043 | 0 | *out++ = '\0'; |
1044 | 0 |
|
1045 | 0 | for (out = dest; *out ; ++out) { |
1046 | 0 | if (*out == '\t') |
1047 | 0 | *out = ' '; |
1048 | 0 | } |
1049 | 0 |
|
1050 | 0 | return dest; |
1051 | 0 |
|
1052 | 0 | badsyntax: |
1053 | 0 | free(dest); |
1054 | 0 | return nullptr; |
1055 | 0 | } |
1056 | | |
1057 | | // check if input is HZ (a 7bit encoding for simplified Chinese : RFC 1842)) |
1058 | | // or has ESC which may be an indication that it's in one of many ISO |
1059 | | // 2022 7bit encodings (e.g. ISO-2022-JP(-2)/CN : see RFC 1468, 1922, 1554). |
1060 | | // static |
1061 | | bool Is7bitNonAsciiString(const char *input, uint32_t len) |
1062 | 0 | { |
1063 | 0 | int32_t c; |
1064 | 0 |
|
1065 | 0 | enum { hz_initial, // No HZ seen yet |
1066 | 0 | hz_escaped, // Inside an HZ ~{ escape sequence |
1067 | 0 | hz_seen, // Have seen at least one complete HZ sequence |
1068 | 0 | hz_notpresent // Have seen something that is not legal HZ |
1069 | 0 | } hz_state; |
1070 | 0 |
|
1071 | 0 | hz_state = hz_initial; |
1072 | 0 | while (len) { |
1073 | 0 | c = uint8_t(*input++); |
1074 | 0 | len--; |
1075 | 0 | if (c & 0x80) return false; |
1076 | 0 | if (c == 0x1B) return true; |
1077 | 0 | if (c == '~') { |
1078 | 0 | switch (hz_state) { |
1079 | 0 | case hz_initial: |
1080 | 0 | case hz_seen: |
1081 | 0 | if (*input == '{') { |
1082 | 0 | hz_state = hz_escaped; |
1083 | 0 | } else if (*input == '~') { |
1084 | 0 | // ~~ is the HZ encoding of ~. Skip over second ~ as well |
1085 | 0 | hz_state = hz_seen; |
1086 | 0 | input++; |
1087 | 0 | len--; |
1088 | 0 | } else { |
1089 | 0 | hz_state = hz_notpresent; |
1090 | 0 | } |
1091 | 0 | break; |
1092 | 0 |
|
1093 | 0 | case hz_escaped: |
1094 | 0 | if (*input == '}') hz_state = hz_seen; |
1095 | 0 | break; |
1096 | 0 | default: |
1097 | 0 | break; |
1098 | 0 | } |
1099 | 0 | } |
1100 | 0 | } |
1101 | 0 | return hz_state == hz_seen; |
1102 | 0 | } |
1103 | | |
1104 | 0 | #define REPLACEMENT_CHAR "\357\277\275" // EF BF BD (UTF-8 encoding of U+FFFD) |
1105 | | |
1106 | | // copy 'raw' sequences of octets in aInput to aOutput. |
1107 | | // If aDefaultCharset is specified, the input is assumed to be in the |
1108 | | // charset and converted to UTF-8. Otherwise, a blind copy is made. |
1109 | | // If aDefaultCharset is specified, but the conversion to UTF-8 |
1110 | | // is not successful, each octet is replaced by Unicode replacement |
1111 | | // chars. *aOutput is advanced by the number of output octets. |
1112 | | // static |
1113 | | void CopyRawHeader(const char *aInput, uint32_t aLen, |
1114 | | const nsACString& aDefaultCharset, nsACString &aOutput) |
1115 | 0 | { |
1116 | 0 | int32_t c; |
1117 | 0 |
|
1118 | 0 | // If aDefaultCharset is not specified, make a blind copy. |
1119 | 0 | if (aDefaultCharset.IsEmpty()) { |
1120 | 0 | aOutput.Append(aInput, aLen); |
1121 | 0 | return; |
1122 | 0 | } |
1123 | 0 | |
1124 | 0 | // Copy as long as it's US-ASCII. An ESC may indicate ISO 2022 |
1125 | 0 | // A ~ may indicate it is HZ |
1126 | 0 | while (aLen && (c = uint8_t(*aInput++)) != 0x1B && c != '~' && !(c & 0x80)) { |
1127 | 0 | aOutput.Append(char(c)); |
1128 | 0 | aLen--; |
1129 | 0 | } |
1130 | 0 | if (!aLen) { |
1131 | 0 | return; |
1132 | 0 | } |
1133 | 0 | aInput--; |
1134 | 0 |
|
1135 | 0 | // skip ASCIIness/UTF8ness test if aInput is supected to be a 7bit non-ascii |
1136 | 0 | // string and aDefaultCharset is a 7bit non-ascii charset. |
1137 | 0 | bool skipCheck = (c == 0x1B || c == '~') && |
1138 | 0 | IS_7BIT_NON_ASCII_CHARSET(PromiseFlatCString(aDefaultCharset).get()); |
1139 | 0 |
|
1140 | 0 | // If not UTF-8, treat as default charset |
1141 | 0 | nsAutoCString utf8Text; |
1142 | 0 | if (NS_SUCCEEDED( |
1143 | 0 | ConvertStringToUTF8(Substring(aInput, aInput + aLen), |
1144 | 0 | PromiseFlatCString(aDefaultCharset), |
1145 | 0 | skipCheck, true, utf8Text))) { |
1146 | 0 | aOutput.Append(utf8Text); |
1147 | 0 | } else { // replace each octet with Unicode replacement char in UTF-8. |
1148 | 0 | for (uint32_t i = 0; i < aLen; i++) { |
1149 | 0 | c = uint8_t(*aInput++); |
1150 | 0 | if (c & 0x80) |
1151 | 0 | aOutput.Append(REPLACEMENT_CHAR); |
1152 | 0 | else |
1153 | 0 | aOutput.Append(char(c)); |
1154 | 0 | } |
1155 | 0 | } |
1156 | 0 | } |
1157 | | |
1158 | | nsresult DecodeQOrBase64Str(const char *aEncoded, size_t aLen, char aQOrBase64, |
1159 | | const nsACString& aCharset, nsACString &aResult) |
1160 | 0 | { |
1161 | 0 | char *decodedText; |
1162 | 0 | NS_ASSERTION(aQOrBase64 == 'Q' || aQOrBase64 == 'B', "Should be 'Q' or 'B'"); |
1163 | 0 | if(aQOrBase64 == 'Q') |
1164 | 0 | decodedText = DecodeQ(aEncoded, aLen); |
1165 | 0 | else if (aQOrBase64 == 'B') { |
1166 | 0 | decodedText = PL_Base64Decode(aEncoded, aLen, nullptr); |
1167 | 0 | } else { |
1168 | 0 | return NS_ERROR_INVALID_ARG; |
1169 | 0 | } |
1170 | 0 | |
1171 | 0 | if (!decodedText) { |
1172 | 0 | return NS_ERROR_INVALID_ARG; |
1173 | 0 | } |
1174 | 0 | |
1175 | 0 | nsAutoCString utf8Text; |
1176 | 0 | // skip ASCIIness/UTF8ness test if aCharset is 7bit non-ascii charset. |
1177 | 0 | nsresult rv = ConvertStringToUTF8(nsDependentCString(decodedText), |
1178 | 0 | aCharset, |
1179 | 0 | IS_7BIT_NON_ASCII_CHARSET(PromiseFlatCString(aCharset).get()), |
1180 | 0 | true, utf8Text); |
1181 | 0 | free(decodedText); |
1182 | 0 | if (NS_FAILED(rv)) { |
1183 | 0 | return rv; |
1184 | 0 | } |
1185 | 0 | aResult.Append(utf8Text); |
1186 | 0 |
|
1187 | 0 | return NS_OK; |
1188 | 0 | } |
1189 | | |
1190 | | static const char especials[] = R"(()<>@,;:\"/[]?.=)"; |
1191 | | |
1192 | | // |decode_mime_part2_str| taken from comi18n.c |
1193 | | // Decode RFC2047-encoded words in the input and convert the result to UTF-8. |
1194 | | // If aOverrideCharset is true, charset in RFC2047-encoded words is |
1195 | | // ignored and aDefaultCharset is assumed, instead. aDefaultCharset |
1196 | | // is also used to convert raw octets (without RFC 2047 encoding) to UTF-8. |
1197 | | //static |
1198 | | nsresult DecodeRFC2047Str(const char *aHeader, const nsACString& aDefaultCharset, |
1199 | | bool aOverrideCharset, nsACString &aResult) |
1200 | 0 | { |
1201 | 0 | const char *p, *q = nullptr, *r; |
1202 | 0 | const char *begin; // tracking pointer for where we are in the input buffer |
1203 | 0 | int32_t isLastEncodedWord = 0; |
1204 | 0 | const char *charsetStart, *charsetEnd; |
1205 | 0 | nsAutoCString prevCharset, curCharset; |
1206 | 0 | nsAutoCString encodedText; |
1207 | 0 | char prevEncoding = '\0', curEncoding; |
1208 | 0 | nsresult rv; |
1209 | 0 |
|
1210 | 0 | begin = aHeader; |
1211 | 0 |
|
1212 | 0 | // To avoid buffer realloc, if possible, set capacity in advance. No |
1213 | 0 | // matter what, more than 3x expansion can never happen for all charsets |
1214 | 0 | // supported by Mozilla. SCSU/BCSU with the sliding window set to a |
1215 | 0 | // non-BMP block may be exceptions, but Mozilla does not support them. |
1216 | 0 | // Neither any known mail/news program use them. Even if there's, we're |
1217 | 0 | // safe because we don't use a raw *char any more. |
1218 | 0 | aResult.SetCapacity(3 * strlen(aHeader)); |
1219 | 0 |
|
1220 | 0 | while ((p = PL_strstr(begin, "=?")) != nullptr) { |
1221 | 0 | if (isLastEncodedWord) { |
1222 | 0 | // See if it's all whitespace. |
1223 | 0 | for (q = begin; q < p; ++q) { |
1224 | 0 | if (!PL_strchr(" \t\r\n", *q)) break; |
1225 | 0 | } |
1226 | 0 | } |
1227 | 0 |
|
1228 | 0 | if (!isLastEncodedWord || q < p) { |
1229 | 0 | if (!encodedText.IsEmpty()) { |
1230 | 0 | rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(), |
1231 | 0 | prevEncoding, prevCharset, aResult); |
1232 | 0 | if (NS_FAILED(rv)) { |
1233 | 0 | aResult.Append(encodedText); |
1234 | 0 | } |
1235 | 0 | encodedText.Truncate(); |
1236 | 0 | prevCharset.Truncate(); |
1237 | 0 | prevEncoding = '\0'; |
1238 | 0 | } |
1239 | 0 | // copy the part before the encoded-word |
1240 | 0 | CopyRawHeader(begin, p - begin, aDefaultCharset, aResult); |
1241 | 0 | begin = p; |
1242 | 0 | } |
1243 | 0 |
|
1244 | 0 | p += 2; |
1245 | 0 |
|
1246 | 0 | // Get charset info |
1247 | 0 | charsetStart = p; |
1248 | 0 | charsetEnd = nullptr; |
1249 | 0 | for (q = p; *q != '?'; q++) { |
1250 | 0 | if (*q <= ' ' || PL_strchr(especials, *q)) { |
1251 | 0 | goto badsyntax; |
1252 | 0 | } |
1253 | 0 | |
1254 | 0 | // RFC 2231 section 5 |
1255 | 0 | if (!charsetEnd && *q == '*') { |
1256 | 0 | charsetEnd = q; |
1257 | 0 | } |
1258 | 0 | } |
1259 | 0 | if (!charsetEnd) { |
1260 | 0 | charsetEnd = q; |
1261 | 0 | } |
1262 | 0 |
|
1263 | 0 | q++; |
1264 | 0 | curEncoding = nsCRT::ToUpper(*q); |
1265 | 0 | if (curEncoding != 'Q' && curEncoding != 'B') |
1266 | 0 | goto badsyntax; |
1267 | 0 | |
1268 | 0 | if (q[1] != '?') |
1269 | 0 | goto badsyntax; |
1270 | 0 | |
1271 | 0 | // loop-wise, keep going until we hit "?=". the inner check handles the |
1272 | 0 | // nul terminator should the string terminate before we hit the right |
1273 | 0 | // marker. (And the r[1] will never reach beyond the end of the string |
1274 | 0 | // because *r != '?' is true if r is the nul character.) |
1275 | 0 | for (r = q + 2; *r != '?' || r[1] != '='; r++) { |
1276 | 0 | if (*r < ' ') goto badsyntax; |
1277 | 0 | } |
1278 | 0 | if (r == q + 2) { |
1279 | 0 | // it's empty, skip |
1280 | 0 | begin = r + 2; |
1281 | 0 | isLastEncodedWord = 1; |
1282 | 0 | continue; |
1283 | 0 | } |
1284 | 0 | |
1285 | 0 | curCharset.Assign(charsetStart, charsetEnd - charsetStart); |
1286 | 0 | // Override charset if requested. Never override labeled UTF-8. |
1287 | 0 | // Use default charset instead of UNKNOWN-8BIT |
1288 | 0 | if ((aOverrideCharset && 0 != nsCRT::strcasecmp(curCharset.get(), "UTF-8")) |
1289 | 0 | || (!aDefaultCharset.IsEmpty() && 0 == nsCRT::strcasecmp(curCharset.get(), "UNKNOWN-8BIT")) |
1290 | 0 | ) { |
1291 | 0 | curCharset = aDefaultCharset; |
1292 | 0 | } |
1293 | 0 |
|
1294 | 0 | const char *R; |
1295 | 0 | R = r; |
1296 | 0 | if (curEncoding == 'B') { |
1297 | 0 | // bug 227290. ignore an extraneous '=' at the end. |
1298 | 0 | // (# of characters in B-encoded part has to be a multiple of 4) |
1299 | 0 | int32_t n = r - (q + 2); |
1300 | 0 | R -= (n % 4 == 1 && !PL_strncmp(r - 3, "===", 3)) ? 1 : 0; |
1301 | 0 | } |
1302 | 0 | // Bug 493544. Don't decode the encoded text until it ends |
1303 | 0 | if (R[-1] != '=' |
1304 | 0 | && (prevCharset.IsEmpty() |
1305 | 0 | || (curCharset == prevCharset && curEncoding == prevEncoding)) |
1306 | 0 | ) { |
1307 | 0 | encodedText.Append(q + 2, R - (q + 2)); |
1308 | 0 | prevCharset = curCharset; |
1309 | 0 | prevEncoding = curEncoding; |
1310 | 0 |
|
1311 | 0 | begin = r + 2; |
1312 | 0 | isLastEncodedWord = 1; |
1313 | 0 | continue; |
1314 | 0 | } |
1315 | 0 | |
1316 | 0 | bool bDecoded; // If the current line has been decoded. |
1317 | 0 | bDecoded = false; |
1318 | 0 | if (!encodedText.IsEmpty()) { |
1319 | 0 | if (curCharset == prevCharset && curEncoding == prevEncoding) { |
1320 | 0 | encodedText.Append(q + 2, R - (q + 2)); |
1321 | 0 | bDecoded = true; |
1322 | 0 | } |
1323 | 0 | rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(), |
1324 | 0 | prevEncoding, prevCharset, aResult); |
1325 | 0 | if (NS_FAILED(rv)) { |
1326 | 0 | aResult.Append(encodedText); |
1327 | 0 | } |
1328 | 0 | encodedText.Truncate(); |
1329 | 0 | prevCharset.Truncate(); |
1330 | 0 | prevEncoding = '\0'; |
1331 | 0 | } |
1332 | 0 | if (!bDecoded) { |
1333 | 0 | rv = DecodeQOrBase64Str(q + 2, R - (q + 2), curEncoding, |
1334 | 0 | curCharset, aResult); |
1335 | 0 | if (NS_FAILED(rv)) { |
1336 | 0 | aResult.Append(encodedText); |
1337 | 0 | } |
1338 | 0 | } |
1339 | 0 |
|
1340 | 0 | begin = r + 2; |
1341 | 0 | isLastEncodedWord = 1; |
1342 | 0 | continue; |
1343 | 0 |
|
1344 | 0 | badsyntax: |
1345 | 0 | if (!encodedText.IsEmpty()) { |
1346 | 0 | rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(), |
1347 | 0 | prevEncoding, prevCharset, aResult); |
1348 | 0 | if (NS_FAILED(rv)) { |
1349 | 0 | aResult.Append(encodedText); |
1350 | 0 | } |
1351 | 0 | encodedText.Truncate(); |
1352 | 0 | prevCharset.Truncate(); |
1353 | 0 | } |
1354 | 0 | // copy the part before the encoded-word |
1355 | 0 | aResult.Append(begin, p - begin); |
1356 | 0 | begin = p; |
1357 | 0 | isLastEncodedWord = 0; |
1358 | 0 | } |
1359 | 0 |
|
1360 | 0 | if (!encodedText.IsEmpty()) { |
1361 | 0 | rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(), |
1362 | 0 | prevEncoding, prevCharset, aResult); |
1363 | 0 | if (NS_FAILED(rv)) { |
1364 | 0 | aResult.Append(encodedText); |
1365 | 0 | } |
1366 | 0 | } |
1367 | 0 |
|
1368 | 0 | // put the tail back |
1369 | 0 | CopyRawHeader(begin, strlen(begin), aDefaultCharset, aResult); |
1370 | 0 |
|
1371 | 0 | nsAutoCString tempStr(aResult); |
1372 | 0 | tempStr.ReplaceChar('\t', ' '); |
1373 | 0 | aResult = tempStr; |
1374 | 0 |
|
1375 | 0 | return NS_OK; |
1376 | 0 | } |