/src/neomutt/email/rfc2047.c
Line | Count | Source (jump to first uncovered line) |
1 | | /** |
2 | | * @file |
3 | | * RFC2047 MIME extensions encoding / decoding routines |
4 | | * |
5 | | * @authors |
6 | | * Copyright (C) 2018 Federico Kircheis <federico.kircheis@gmail.com> |
7 | | * Copyright (C) 2018-2020 Pietro Cerutti <gahr@gahr.ch> |
8 | | * Copyright (C) 2018-2023 Richard Russon <rich@flatcap.org> |
9 | | * Copyright (C) 2023 Anna Figueiredo Gomes <navi@vlhl.dev> |
10 | | * Copyright (C) 2023 наб <nabijaczleweli@nabijaczleweli.xyz> |
11 | | * |
12 | | * @copyright |
13 | | * This program is free software: you can redistribute it and/or modify it under |
14 | | * the terms of the GNU General Public License as published by the Free Software |
15 | | * Foundation, either version 2 of the License, or (at your option) any later |
16 | | * version. |
17 | | * |
18 | | * This program is distributed in the hope that it will be useful, but WITHOUT |
19 | | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
20 | | * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
21 | | * details. |
22 | | * |
23 | | * You should have received a copy of the GNU General Public License along with |
24 | | * this program. If not, see <http://www.gnu.org/licenses/>. |
25 | | */ |
26 | | |
27 | | /** |
28 | | * @page email_rfc2047 RFC2047 encoding / decoding |
29 | | * |
30 | | * RFC2047 MIME extensions encoding / decoding routines. |
31 | | */ |
32 | | |
33 | | #include "config.h" |
34 | | #include <ctype.h> |
35 | | #include <errno.h> |
36 | | #include <iconv.h> |
37 | | #include <stdbool.h> |
38 | | #include <string.h> |
39 | | #include "mutt/lib.h" |
40 | | #include "address/lib.h" |
41 | | #include "config/lib.h" |
42 | | #include "core/lib.h" |
43 | | #include "rfc2047.h" |
44 | | #include "envelope.h" |
45 | | #include "mime.h" |
46 | | |
47 | 0 | #define ENCWORD_LEN_MAX 75 |
48 | 0 | #define ENCWORD_LEN_MIN 9 /* strlen ("=?.?.?.?=") */ |
49 | | |
50 | 0 | #define HSPACE(ch) (((ch) == '\0') || ((ch) == ' ') || ((ch) == '\t')) |
51 | | |
52 | 0 | #define CONTINUATION_BYTE(ch) (((ch) & 0xc0) == 0x80) |
53 | | |
54 | | /** |
55 | | * @defgroup encoder_api Mime Encoder API |
56 | | * |
57 | | * Prototype for an encoding function |
58 | | * |
59 | | * @param res Buffer for the result |
60 | | * @param src String to encode |
61 | | * @param srclen Length of string to encode |
62 | | * @param tocode Character encoding |
63 | | * @retval num Bytes written to buffer |
64 | | */ |
65 | | typedef size_t (*encoder_t)(char *res, const char *buf, size_t buflen, const char *tocode); |
66 | | |
67 | | /** |
68 | | * b_encoder - Base64 Encode a string - Implements ::encoder_t - @ingroup encoder_api |
69 | | */ |
70 | | static size_t b_encoder(char *res, const char *src, size_t srclen, const char *tocode) |
71 | 0 | { |
72 | 0 | char *s0 = res; |
73 | |
|
74 | 0 | memcpy(res, "=?", 2); |
75 | 0 | res += 2; |
76 | 0 | memcpy(res, tocode, strlen(tocode)); |
77 | 0 | res += strlen(tocode); |
78 | 0 | memcpy(res, "?B?", 3); |
79 | 0 | res += 3; |
80 | |
|
81 | 0 | while (srclen) |
82 | 0 | { |
83 | 0 | char encoded[11] = { 0 }; |
84 | 0 | size_t rc; |
85 | 0 | size_t in_len = MIN(3, srclen); |
86 | |
|
87 | 0 | rc = mutt_b64_encode(src, in_len, encoded, sizeof(encoded)); |
88 | 0 | for (size_t i = 0; i < rc; i++) |
89 | 0 | *res++ = encoded[i]; |
90 | |
|
91 | 0 | srclen -= in_len; |
92 | 0 | src += in_len; |
93 | 0 | } |
94 | |
|
95 | 0 | memcpy(res, "?=", 2); |
96 | 0 | res += 2; |
97 | 0 | return res - s0; |
98 | 0 | } |
99 | | |
100 | | /** |
101 | | * q_encoder - Quoted-printable Encode a string - Implements ::encoder_t - @ingroup encoder_api |
102 | | */ |
103 | | static size_t q_encoder(char *res, const char *src, size_t srclen, const char *tocode) |
104 | 0 | { |
105 | 0 | static const char hex[] = "0123456789ABCDEF"; |
106 | 0 | char *s0 = res; |
107 | |
|
108 | 0 | memcpy(res, "=?", 2); |
109 | 0 | res += 2; |
110 | 0 | memcpy(res, tocode, strlen(tocode)); |
111 | 0 | res += strlen(tocode); |
112 | 0 | memcpy(res, "?Q?", 3); |
113 | 0 | res += 3; |
114 | 0 | while (srclen--) |
115 | 0 | { |
116 | 0 | unsigned char c = *src++; |
117 | 0 | if (c == ' ') |
118 | 0 | { |
119 | 0 | *res++ = '_'; |
120 | 0 | } |
121 | 0 | else if ((c >= 0x7f) || (c < 0x20) || (c == '_') || strchr(MimeSpecials, c)) |
122 | 0 | { |
123 | 0 | *res++ = '='; |
124 | 0 | *res++ = hex[(c & 0xf0) >> 4]; |
125 | 0 | *res++ = hex[c & 0x0f]; |
126 | 0 | } |
127 | 0 | else |
128 | 0 | { |
129 | 0 | *res++ = c; |
130 | 0 | } |
131 | 0 | } |
132 | 0 | memcpy(res, "?=", 2); |
133 | 0 | res += 2; |
134 | 0 | return res - s0; |
135 | 0 | } |
136 | | |
137 | | /** |
138 | | * parse_encoded_word - Parse a string and report RFC2047 elements |
139 | | * @param[in] str String to parse |
140 | | * @param[out] enc Content encoding found in the first RFC2047 word |
141 | | * @param[out] charset Charset found in the first RFC2047 word |
142 | | * @param[out] charsetlen Length of the charset string found |
143 | | * @param[out] text Start of the first RFC2047 encoded text |
144 | | * @param[out] textlen Length of the encoded text found |
145 | | * @retval ptr Start of the RFC2047 encoded word |
146 | | * @retval NULL None was found |
147 | | */ |
148 | | static char *parse_encoded_word(char *str, enum ContentEncoding *enc, char **charset, |
149 | | size_t *charsetlen, char **text, size_t *textlen) |
150 | 1.48M | { |
151 | 1.48M | regmatch_t *match = mutt_prex_capture(PREX_RFC2047_ENCODED_WORD, str); |
152 | 1.48M | if (!match) |
153 | 1.48M | return NULL; |
154 | | |
155 | 0 | const regmatch_t *mfull = &match[PREX_RFC2047_ENCODED_WORD_MATCH_FULL]; |
156 | 0 | const regmatch_t *mcharset = &match[PREX_RFC2047_ENCODED_WORD_MATCH_CHARSET]; |
157 | 0 | const regmatch_t *mencoding = &match[PREX_RFC2047_ENCODED_WORD_MATCH_ENCODING]; |
158 | 0 | const regmatch_t *mtext = &match[PREX_RFC2047_ENCODED_WORD_MATCH_TEXT]; |
159 | | |
160 | | /* Charset */ |
161 | 0 | *charset = str + mutt_regmatch_start(mcharset); |
162 | 0 | *charsetlen = mutt_regmatch_len(mcharset); |
163 | | |
164 | | /* Encoding: either Q or B */ |
165 | 0 | *enc = (tolower(str[mutt_regmatch_start(mencoding)]) == 'q') ? ENC_QUOTED_PRINTABLE : ENC_BASE64; |
166 | |
|
167 | 0 | *text = str + mutt_regmatch_start(mtext); |
168 | 0 | *textlen = mutt_regmatch_len(mtext); |
169 | 0 | return str + mutt_regmatch_start(mfull); |
170 | 1.48M | } |
171 | | |
172 | | /** |
173 | | * try_block - Attempt to convert a block of text |
174 | | * @param d String to convert |
175 | | * @param dlen Length of string |
176 | | * @param fromcode Original encoding |
177 | | * @param tocode New encoding |
178 | | * @param encoder Encoding function |
179 | | * @param wlen Number of characters converted |
180 | | * @retval 0 Success, string converted |
181 | | * @retval >0 Error, number of bytes that could be converted |
182 | | * |
183 | | * If the data could be converted using encoder, then set *encoder and *wlen. |
184 | | * Otherwise return an upper bound on the maximum length of the data which |
185 | | * could be converted. |
186 | | * |
187 | | * The data is converted from fromcode (which must be stateless) to tocode, |
188 | | * unless fromcode is NULL, in which case the data is assumed to be already in |
189 | | * tocode, which should be 8-bit and stateless. |
190 | | */ |
191 | | static size_t try_block(const char *d, size_t dlen, const char *fromcode, |
192 | | const char *tocode, encoder_t *encoder, size_t *wlen) |
193 | 0 | { |
194 | 0 | char buf[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1]; |
195 | 0 | const char *ib = NULL; |
196 | 0 | char *ob = NULL; |
197 | 0 | size_t ibl, obl; |
198 | 0 | int count, len, len_b, len_q; |
199 | |
|
200 | 0 | if (fromcode) |
201 | 0 | { |
202 | 0 | iconv_t cd = mutt_ch_iconv_open(tocode, fromcode, MUTT_ICONV_NO_FLAGS); |
203 | 0 | ASSERT(iconv_t_valid(cd)); |
204 | 0 | ib = d; |
205 | 0 | ibl = dlen; |
206 | 0 | ob = buf; |
207 | 0 | obl = sizeof(buf) - strlen(tocode); |
208 | 0 | if ((iconv(cd, (ICONV_CONST char **) &ib, &ibl, &ob, &obl) == ICONV_ILLEGAL_SEQ) || |
209 | 0 | (iconv(cd, NULL, NULL, &ob, &obl) == ICONV_ILLEGAL_SEQ)) |
210 | 0 | { |
211 | 0 | ASSERT(errno == E2BIG); |
212 | 0 | ASSERT(ib > d); |
213 | 0 | return ((ib - d) == dlen) ? dlen : ib - d + 1; |
214 | 0 | } |
215 | 0 | } |
216 | 0 | else |
217 | 0 | { |
218 | 0 | if (dlen > (sizeof(buf) - strlen(tocode))) |
219 | 0 | return sizeof(buf) - strlen(tocode) + 1; |
220 | 0 | memcpy(buf, d, dlen); |
221 | 0 | ob = buf + dlen; |
222 | 0 | } |
223 | | |
224 | 0 | count = 0; |
225 | 0 | for (char *p = buf; p < ob; p++) |
226 | 0 | { |
227 | 0 | unsigned char c = *p; |
228 | 0 | ASSERT(strchr(MimeSpecials, '?')); |
229 | 0 | if ((c >= 0x7f) || (c < 0x20) || (*p == '_') || |
230 | 0 | ((c != ' ') && strchr(MimeSpecials, *p))) |
231 | 0 | { |
232 | 0 | count++; |
233 | 0 | } |
234 | 0 | } |
235 | | |
236 | 0 | len = ENCWORD_LEN_MIN - 2 + strlen(tocode); |
237 | 0 | len_b = len + (((ob - buf) + 2) / 3) * 4; |
238 | 0 | len_q = len + (ob - buf) + 2 * count; |
239 | | |
240 | | /* Apparently RFC1468 says to use B encoding for iso-2022-jp. */ |
241 | 0 | if (mutt_istr_equal(tocode, "ISO-2022-JP")) |
242 | 0 | len_q = ENCWORD_LEN_MAX + 1; |
243 | |
|
244 | 0 | if ((len_b < len_q) && (len_b <= ENCWORD_LEN_MAX)) |
245 | 0 | { |
246 | 0 | *encoder = b_encoder; |
247 | 0 | *wlen = len_b; |
248 | 0 | return 0; |
249 | 0 | } |
250 | 0 | else if (len_q <= ENCWORD_LEN_MAX) |
251 | 0 | { |
252 | 0 | *encoder = q_encoder; |
253 | 0 | *wlen = len_q; |
254 | 0 | return 0; |
255 | 0 | } |
256 | 0 | else |
257 | 0 | { |
258 | 0 | return dlen; |
259 | 0 | } |
260 | 0 | } |
261 | | |
262 | | /** |
263 | | * encode_block - Encode a block of text using an encoder |
264 | | * @param str String to convert |
265 | | * @param buf Buffer for result |
266 | | * @param buflen Buffer length |
267 | | * @param fromcode Original encoding |
268 | | * @param tocode New encoding |
269 | | * @param encoder Encoding function |
270 | | * @retval num Length of the encoded word |
271 | | * |
272 | | * Encode the data (buf, buflen) into str using the encoder. |
273 | | */ |
274 | | static size_t encode_block(char *str, char *buf, size_t buflen, const char *fromcode, |
275 | | const char *tocode, encoder_t encoder) |
276 | 0 | { |
277 | 0 | if (!fromcode) |
278 | 0 | { |
279 | 0 | return (*encoder)(str, buf, buflen, tocode); |
280 | 0 | } |
281 | | |
282 | 0 | const iconv_t cd = mutt_ch_iconv_open(tocode, fromcode, MUTT_ICONV_NO_FLAGS); |
283 | 0 | ASSERT(iconv_t_valid(cd)); |
284 | 0 | const char *ib = buf; |
285 | 0 | size_t ibl = buflen; |
286 | 0 | char tmp[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1]; |
287 | 0 | char *ob = tmp; |
288 | 0 | size_t obl = sizeof(tmp) - strlen(tocode); |
289 | 0 | const size_t n1 = iconv(cd, (ICONV_CONST char **) &ib, &ibl, &ob, &obl); |
290 | 0 | const size_t n2 = iconv(cd, NULL, NULL, &ob, &obl); |
291 | 0 | ASSERT((n1 != ICONV_ILLEGAL_SEQ) && (n2 != ICONV_ILLEGAL_SEQ)); |
292 | 0 | return (*encoder)(str, tmp, ob - tmp, tocode); |
293 | 0 | } |
294 | | |
295 | | /** |
296 | | * choose_block - Calculate how much data can be converted |
297 | | * @param d String to convert |
298 | | * @param dlen Length of string |
299 | | * @param col Starting column to convert |
300 | | * @param fromcode Original encoding |
301 | | * @param tocode New encoding |
302 | | * @param encoder Encoding function |
303 | | * @param wlen Number of characters converted |
304 | | * @retval num Bytes that can be converted |
305 | | * |
306 | | * Discover how much of the data (d, dlen) can be converted into a single |
307 | | * encoded word. Return how much data can be converted, and set the length |
308 | | * *wlen of the encoded word and *encoder. We start in column col, which |
309 | | * limits the length of the word. |
310 | | */ |
311 | | static size_t choose_block(char *d, size_t dlen, int col, const char *fromcode, |
312 | | const char *tocode, encoder_t *encoder, size_t *wlen) |
313 | 0 | { |
314 | 0 | const bool utf8 = fromcode && mutt_istr_equal(fromcode, "utf-8"); |
315 | |
|
316 | 0 | size_t n = dlen; |
317 | 0 | while (true) |
318 | 0 | { |
319 | 0 | ASSERT(n > 0); |
320 | 0 | const size_t nn = try_block(d, n, fromcode, tocode, encoder, wlen); |
321 | 0 | if ((nn == 0) && (((col + *wlen) <= (ENCWORD_LEN_MAX + 1)) || (n <= 1))) |
322 | 0 | break; |
323 | 0 | n = ((nn != 0) ? nn : n) - 1; |
324 | 0 | ASSERT(n > 0); |
325 | 0 | if (utf8) |
326 | 0 | while ((n > 1) && CONTINUATION_BYTE(d[n])) |
327 | 0 | n--; |
328 | 0 | } |
329 | 0 | return n; |
330 | 0 | } |
331 | | |
332 | | /** |
333 | | * finalize_chunk - Perform charset conversion and filtering |
334 | | * @param[out] res Buffer where the resulting string is appended |
335 | | * @param[in] buf Buffer with the input string |
336 | | * @param[in] charset Charset to use for the conversion |
337 | | * @param[in] charsetlen Length of the charset parameter |
338 | | * |
339 | | * The buffer buf is reinitialized at the end of this function. |
340 | | */ |
341 | | static void finalize_chunk(struct Buffer *res, struct Buffer *buf, char *charset, size_t charsetlen) |
342 | 0 | { |
343 | 0 | if (!charset) |
344 | 0 | return; |
345 | 0 | char end = charset[charsetlen]; |
346 | 0 | charset[charsetlen] = '\0'; |
347 | 0 | mutt_ch_convert_string(&buf->data, charset, cc_charset(), MUTT_ICONV_HOOK_FROM); |
348 | 0 | charset[charsetlen] = end; |
349 | 0 | mutt_mb_filter_unprintable(&buf->data); |
350 | 0 | buf_addstr(res, buf->data); |
351 | 0 | FREE(&buf->data); |
352 | 0 | buf_init(buf); |
353 | 0 | } |
354 | | |
355 | | /** |
356 | | * decode_word - Decode an RFC2047-encoded string |
357 | | * @param s String to decode |
358 | | * @param len Length of the string |
359 | | * @param enc Encoding type |
360 | | * @retval ptr Decoded string |
361 | | * |
362 | | * @note The input string must be NUL-terminated; the len parameter is |
363 | | * an optimization. The caller must free the returned string. |
364 | | */ |
365 | | static char *decode_word(const char *s, size_t len, enum ContentEncoding enc) |
366 | 0 | { |
367 | 0 | const char *it = s; |
368 | 0 | const char *end = s + len; |
369 | |
|
370 | 0 | ASSERT(*end == '\0'); |
371 | |
|
372 | 0 | if (enc == ENC_QUOTED_PRINTABLE) |
373 | 0 | { |
374 | 0 | struct Buffer *buf = buf_pool_get(); |
375 | 0 | for (; it < end; it++) |
376 | 0 | { |
377 | 0 | if (*it == '_') |
378 | 0 | { |
379 | 0 | buf_addch(buf, ' '); |
380 | 0 | } |
381 | 0 | else if ((it[0] == '=') && (!(it[1] & ~127) && (hexval(it[1]) != -1)) && |
382 | 0 | (!(it[2] & ~127) && (hexval(it[2]) != -1))) |
383 | 0 | { |
384 | 0 | buf_addch(buf, (hexval(it[1]) << 4) | hexval(it[2])); |
385 | 0 | it += 2; |
386 | 0 | } |
387 | 0 | else |
388 | 0 | { |
389 | 0 | buf_addch(buf, *it); |
390 | 0 | } |
391 | 0 | } |
392 | 0 | char *str = buf_strdup(buf); |
393 | 0 | buf_pool_release(&buf); |
394 | 0 | return str; |
395 | 0 | } |
396 | 0 | else if (enc == ENC_BASE64) |
397 | 0 | { |
398 | 0 | const int olen = 3 * len / 4 + 1; |
399 | 0 | char *out = MUTT_MEM_MALLOC(olen, char); |
400 | 0 | int dlen = mutt_b64_decode(it, out, olen); |
401 | 0 | if (dlen == -1) |
402 | 0 | { |
403 | 0 | FREE(&out); |
404 | 0 | return NULL; |
405 | 0 | } |
406 | 0 | out[dlen] = '\0'; |
407 | 0 | return out; |
408 | 0 | } |
409 | | |
410 | 0 | ASSERT(0); /* The enc parameter has an invalid value */ |
411 | 0 | return NULL; |
412 | 0 | } |
413 | | |
414 | | /** |
415 | | * encode - RFC2047-encode a string |
416 | | * @param[in] d String to convert |
417 | | * @param[in] dlen Length of string |
418 | | * @param[in] col Starting column to convert |
419 | | * @param[in] fromcode Original encoding |
420 | | * @param[in] charsets List of allowable encodings (colon separated) |
421 | | * @param[out] e Encoded string |
422 | | * @param[out] elen Length of encoded string |
423 | | * @param[in] specials Special characters to be encoded |
424 | | * @retval 0 Success |
425 | | */ |
426 | | static int encode(const char *d, size_t dlen, int col, const char *fromcode, |
427 | | const struct Slist *charsets, char **e, size_t *elen, const char *specials) |
428 | 0 | { |
429 | 0 | int rc = 0; |
430 | 0 | char *buf = NULL; |
431 | 0 | size_t bufpos, buflen; |
432 | 0 | char *t0 = NULL, *t1 = NULL, *t = NULL; |
433 | 0 | char *s0 = NULL, *s1 = NULL; |
434 | 0 | size_t ulen, r, wlen = 0; |
435 | 0 | encoder_t encoder = NULL; |
436 | 0 | char *tocode1 = NULL; |
437 | 0 | const char *tocode = NULL; |
438 | 0 | const char *icode = "utf-8"; |
439 | | |
440 | | /* Try to convert to UTF-8. */ |
441 | 0 | char *u = mutt_strn_dup(d, dlen); |
442 | 0 | if (mutt_ch_convert_string(&u, fromcode, icode, MUTT_ICONV_NO_FLAGS) != 0) |
443 | 0 | { |
444 | 0 | rc = 1; |
445 | 0 | icode = 0; |
446 | 0 | } |
447 | 0 | ulen = mutt_str_len(u); |
448 | | |
449 | | /* Find earliest and latest things we must encode. */ |
450 | 0 | s0 = 0; |
451 | 0 | s1 = 0; |
452 | 0 | t0 = 0; |
453 | 0 | t1 = 0; |
454 | 0 | for (t = u; t < (u + ulen); t++) |
455 | 0 | { |
456 | 0 | if ((*t & 0x80) || ((*t == '=') && (t[1] == '?') && ((t == u) || HSPACE(*(t - 1))))) |
457 | 0 | { |
458 | 0 | if (!t0) |
459 | 0 | t0 = t; |
460 | 0 | t1 = t; |
461 | 0 | } |
462 | 0 | else if (specials && *t && strchr(specials, *t)) |
463 | 0 | { |
464 | 0 | if (!s0) |
465 | 0 | s0 = t; |
466 | 0 | s1 = t; |
467 | 0 | } |
468 | 0 | } |
469 | | |
470 | | /* If we have something to encode, include RFC822 specials */ |
471 | 0 | if (t0 && s0 && (s0 < t0)) |
472 | 0 | t0 = s0; |
473 | 0 | if (t1 && s1 && (s1 > t1)) |
474 | 0 | t1 = s1; |
475 | |
|
476 | 0 | if (!t0) |
477 | 0 | { |
478 | | /* No encoding is required. */ |
479 | 0 | *e = u; |
480 | 0 | *elen = ulen; |
481 | 0 | return rc; |
482 | 0 | } |
483 | | |
484 | | /* Choose target charset. */ |
485 | 0 | tocode = fromcode; |
486 | 0 | if (icode) |
487 | 0 | { |
488 | 0 | tocode1 = mutt_ch_choose(icode, charsets, u, ulen, 0, 0); |
489 | 0 | if (tocode1) |
490 | 0 | { |
491 | 0 | tocode = tocode1; |
492 | 0 | } |
493 | 0 | else |
494 | 0 | { |
495 | 0 | rc = 2; |
496 | 0 | icode = 0; |
497 | 0 | } |
498 | 0 | } |
499 | | |
500 | | /* Hack to avoid labelling 8-bit data as us-ascii. */ |
501 | 0 | if (!icode && mutt_ch_is_us_ascii(tocode)) |
502 | 0 | tocode = "unknown-8bit"; |
503 | | |
504 | | /* Adjust t0 for maximum length of line. */ |
505 | 0 | t = u + (ENCWORD_LEN_MAX + 1) - col - ENCWORD_LEN_MIN; |
506 | 0 | if (t < u) |
507 | 0 | t = u; |
508 | 0 | if (t < t0) |
509 | 0 | t0 = t; |
510 | | |
511 | | /* Adjust t0 until we can encode a character after a space. */ |
512 | 0 | for (; t0 > u; t0--) |
513 | 0 | { |
514 | 0 | if (!HSPACE(*(t0 - 1))) |
515 | 0 | continue; |
516 | 0 | t = t0 + 1; |
517 | 0 | if (icode) |
518 | 0 | while ((t < (u + ulen)) && CONTINUATION_BYTE(*t)) |
519 | 0 | t++; |
520 | 0 | if ((try_block(t0, t - t0, icode, tocode, &encoder, &wlen) == 0) && |
521 | 0 | ((col + (t0 - u) + wlen) <= (ENCWORD_LEN_MAX + 1))) |
522 | 0 | { |
523 | 0 | break; |
524 | 0 | } |
525 | 0 | } |
526 | | |
527 | | /* Adjust t1 until we can encode a character before a space. */ |
528 | 0 | for (; t1 < (u + ulen); t1++) |
529 | 0 | { |
530 | 0 | if (!HSPACE(*t1)) |
531 | 0 | continue; |
532 | 0 | t = t1 - 1; |
533 | 0 | if (icode) |
534 | 0 | while (CONTINUATION_BYTE(*t)) |
535 | 0 | t--; |
536 | 0 | if ((try_block(t, t1 - t, icode, tocode, &encoder, &wlen) == 0) && |
537 | 0 | ((1 + wlen + (u + ulen - t1)) <= (ENCWORD_LEN_MAX + 1))) |
538 | 0 | { |
539 | 0 | break; |
540 | 0 | } |
541 | 0 | } |
542 | | |
543 | | /* We shall encode the region [t0,t1). */ |
544 | | |
545 | | /* Initialise the output buffer with the us-ascii prefix. */ |
546 | 0 | buflen = 2 * ulen; |
547 | 0 | buf = MUTT_MEM_MALLOC(buflen, char); |
548 | 0 | bufpos = t0 - u; |
549 | 0 | memcpy(buf, u, t0 - u); |
550 | |
|
551 | 0 | col += t0 - u; |
552 | |
|
553 | 0 | t = t0; |
554 | 0 | while (true) |
555 | 0 | { |
556 | | /* Find how much we can encode. */ |
557 | 0 | size_t n = choose_block(t, t1 - t, col, icode, tocode, &encoder, &wlen); |
558 | 0 | if (n == (t1 - t)) |
559 | 0 | { |
560 | | /* See if we can fit the us-ascii suffix, too. */ |
561 | 0 | if ((col + wlen + (u + ulen - t1)) <= (ENCWORD_LEN_MAX + 1)) |
562 | 0 | break; |
563 | 0 | n = t1 - t - 1; |
564 | 0 | if (icode) |
565 | 0 | while (CONTINUATION_BYTE(t[n])) |
566 | 0 | n--; |
567 | 0 | if (n == 0) |
568 | 0 | { |
569 | | /* This should only happen in the really stupid case where the |
570 | | * only word that needs encoding is one character long, but |
571 | | * there is too much us-ascii stuff after it to use a single |
572 | | * encoded word. We add the next word to the encoded region |
573 | | * and try again. */ |
574 | 0 | ASSERT(t1 < (u + ulen)); |
575 | 0 | for (t1++; (t1 < (u + ulen)) && !HSPACE(*t1); t1++) |
576 | 0 | ; // do nothing |
577 | |
|
578 | 0 | continue; |
579 | 0 | } |
580 | 0 | n = choose_block(t, n, col, icode, tocode, &encoder, &wlen); |
581 | 0 | } |
582 | | |
583 | | /* Add to output buffer. */ |
584 | 0 | const char *line_break = "\n\t"; |
585 | 0 | const int lb_len = 2; /* strlen(line_break) */ |
586 | |
|
587 | 0 | if ((bufpos + wlen + lb_len) > buflen) |
588 | 0 | { |
589 | 0 | buflen = bufpos + wlen + lb_len; |
590 | 0 | MUTT_MEM_REALLOC(&buf, buflen, char); |
591 | 0 | } |
592 | 0 | r = encode_block(buf + bufpos, t, n, icode, tocode, encoder); |
593 | 0 | ASSERT(r == wlen); |
594 | 0 | bufpos += wlen; |
595 | 0 | memcpy(buf + bufpos, line_break, lb_len); |
596 | 0 | bufpos += lb_len; |
597 | |
|
598 | 0 | col = 1; |
599 | |
|
600 | 0 | t += n; |
601 | 0 | } |
602 | | |
603 | | /* Add last encoded word and us-ascii suffix to buffer. */ |
604 | 0 | buflen = bufpos + wlen + (u + ulen - t1); |
605 | 0 | MUTT_MEM_REALLOC(&buf, buflen + 1, char); |
606 | 0 | r = encode_block(buf + bufpos, t, t1 - t, icode, tocode, encoder); |
607 | 0 | ASSERT(r == wlen); |
608 | 0 | bufpos += wlen; |
609 | 0 | memcpy(buf + bufpos, t1, u + ulen - t1); |
610 | |
|
611 | 0 | FREE(&tocode1); |
612 | 0 | FREE(&u); |
613 | |
|
614 | 0 | buf[buflen] = '\0'; |
615 | |
|
616 | 0 | *e = buf; |
617 | 0 | *elen = buflen + 1; |
618 | 0 | return rc; |
619 | 0 | } |
620 | | |
621 | | /** |
622 | | * rfc2047_encode - RFC-2047-encode a string |
623 | | * @param[in,out] pd String to be encoded, and resulting encoded string |
624 | | * @param[in] specials Special characters to be encoded |
625 | | * @param[in] col Starting index in string |
626 | | * @param[in] charsets List of charsets to choose from |
627 | | */ |
628 | | void rfc2047_encode(char **pd, const char *specials, int col, const struct Slist *charsets) |
629 | 0 | { |
630 | 0 | if (!pd || !*pd) |
631 | 0 | return; |
632 | | |
633 | 0 | const char *const c_charset = cc_charset(); |
634 | 0 | if (!c_charset) |
635 | 0 | return; |
636 | | |
637 | 0 | struct Slist *fallback = NULL; |
638 | 0 | if (!charsets) |
639 | 0 | { |
640 | 0 | fallback = slist_parse("utf-8", D_SLIST_SEP_COLON); |
641 | 0 | charsets = fallback; |
642 | 0 | } |
643 | |
|
644 | 0 | char *e = NULL; |
645 | 0 | size_t elen = 0; |
646 | 0 | encode(*pd, strlen(*pd), col, c_charset, charsets, &e, &elen, specials); |
647 | |
|
648 | 0 | slist_free(&fallback); |
649 | 0 | FREE(pd); |
650 | 0 | *pd = e; |
651 | 0 | } |
652 | | |
653 | | /** |
654 | | * rfc2047_decode - Decode any RFC2047-encoded header fields |
655 | | * @param[in,out] pd String to be decoded, and resulting decoded string |
656 | | * |
657 | | * Try to decode anything that looks like a valid RFC2047 encoded header field, |
658 | | * ignoring RFC822 parsing rules. If decoding fails, for example due to an |
659 | | * invalid base64 string, the original input is left untouched. |
660 | | */ |
661 | | void rfc2047_decode(char **pd) |
662 | 1.73M | { |
663 | 1.73M | if (!pd || !*pd) |
664 | 243k | return; |
665 | | |
666 | 1.48M | struct Buffer *buf = buf_pool_get(); // Output buffer |
667 | 1.48M | char *s = *pd; // Read pointer |
668 | 1.48M | char *beg = NULL; // Begin of encoded word |
669 | 1.48M | enum ContentEncoding enc = ENC_OTHER; // ENC_BASE64 or ENC_QUOTED_PRINTABLE |
670 | 1.48M | char *charset = NULL; // Which charset |
671 | 1.48M | size_t charsetlen; // Length of the charset |
672 | 1.48M | char *text = NULL; // Encoded text |
673 | 1.48M | size_t textlen = 0; // Length of encoded text |
674 | | |
675 | | /* Keep some state in case the next decoded word is using the same charset |
676 | | * and it happens to be split in the middle of a multibyte character. |
677 | | * See https://github.com/neomutt/neomutt/issues/1015 */ |
678 | 1.48M | struct Buffer *prev = buf_pool_get(); /* Previously decoded word */ |
679 | 1.48M | char *prev_charset = NULL; /* Previously used charset */ |
680 | 1.48M | size_t prev_charsetlen = 0; /* Length of the previously used charset */ |
681 | | |
682 | 1.48M | const struct Slist *c_assumed_charset = cc_assumed_charset(); |
683 | 1.48M | const char *c_charset = cc_charset(); |
684 | 2.97M | while (*s) |
685 | 1.48M | { |
686 | 1.48M | beg = parse_encoded_word(s, &enc, &charset, &charsetlen, &text, &textlen); |
687 | 1.48M | if (beg != s) |
688 | 1.48M | { |
689 | | /* Some non-encoded text was found */ |
690 | 1.48M | size_t holelen = beg ? beg - s : mutt_str_len(s); |
691 | | |
692 | | /* Ignore whitespace between encoded words */ |
693 | 1.48M | if (beg && (mutt_str_lws_len(s, holelen) == holelen)) |
694 | 0 | { |
695 | 0 | s = beg; |
696 | 0 | continue; |
697 | 0 | } |
698 | | |
699 | | /* If we have some previously decoded text, add it now */ |
700 | 1.48M | if (!buf_is_empty(prev)) |
701 | 0 | { |
702 | 0 | finalize_chunk(buf, prev, prev_charset, prev_charsetlen); |
703 | 0 | } |
704 | | |
705 | | /* Add non-encoded part */ |
706 | 1.48M | if (slist_is_empty(c_assumed_charset)) |
707 | 1.48M | { |
708 | 1.48M | buf_addstr_n(buf, s, holelen); |
709 | 1.48M | } |
710 | 0 | else |
711 | 0 | { |
712 | 0 | char *conv = mutt_strn_dup(s, holelen); |
713 | 0 | mutt_ch_convert_nonmime_string(c_assumed_charset, c_charset, &conv); |
714 | 0 | buf_addstr(buf, conv); |
715 | 0 | FREE(&conv); |
716 | 0 | } |
717 | 1.48M | s += holelen; |
718 | 1.48M | } |
719 | 1.48M | if (beg) |
720 | 0 | { |
721 | | /* Some encoded text was found */ |
722 | 0 | text[textlen] = '\0'; |
723 | 0 | char *decoded = decode_word(text, textlen, enc); |
724 | 0 | if (!decoded) |
725 | 0 | { |
726 | 0 | goto done; |
727 | 0 | } |
728 | 0 | if (!buf_is_empty(prev) && ((prev_charsetlen != charsetlen) || |
729 | 0 | !mutt_strn_equal(prev_charset, charset, charsetlen))) |
730 | 0 | { |
731 | | /* Different charset, convert the previous chunk and add it to the |
732 | | * final result */ |
733 | 0 | finalize_chunk(buf, prev, prev_charset, prev_charsetlen); |
734 | 0 | } |
735 | |
|
736 | 0 | buf_addstr(prev, decoded); |
737 | 0 | FREE(&decoded); |
738 | 0 | prev_charset = charset; |
739 | 0 | prev_charsetlen = charsetlen; |
740 | 0 | s = text + textlen + 2; /* Skip final ?= */ |
741 | 0 | } |
742 | 1.48M | } |
743 | | |
744 | | /* Save the last chunk */ |
745 | 1.48M | if (!buf_is_empty(prev)) |
746 | 0 | { |
747 | 0 | finalize_chunk(buf, prev, prev_charset, prev_charsetlen); |
748 | 0 | } |
749 | | |
750 | 1.48M | FREE(pd); |
751 | 1.48M | *pd = buf_strdup(buf); |
752 | | |
753 | 1.48M | done: |
754 | 1.48M | buf_pool_release(&buf); |
755 | 1.48M | buf_pool_release(&prev); |
756 | 1.48M | } |
757 | | |
758 | | /** |
759 | | * rfc2047_encode_addrlist - Encode any RFC2047 headers, where required, in an Address list |
760 | | * @param al AddressList |
761 | | * @param tag Header tag (used for wrapping calculation) |
762 | | * |
763 | | * @note rfc2047_encode() may realloc the data pointer it's given, |
764 | | * so work on a copy to avoid breaking the Buffer |
765 | | */ |
766 | | void rfc2047_encode_addrlist(struct AddressList *al, const char *tag) |
767 | 0 | { |
768 | 0 | if (!al) |
769 | 0 | return; |
770 | | |
771 | 0 | int col = tag ? strlen(tag) + 2 : 32; |
772 | 0 | struct Address *a = NULL; |
773 | 0 | char *data = NULL; |
774 | 0 | const struct Slist *const c_send_charset = cs_subset_slist(NeoMutt->sub, "send_charset"); |
775 | 0 | TAILQ_FOREACH(a, al, entries) |
776 | 0 | { |
777 | 0 | if (a->personal) |
778 | 0 | { |
779 | 0 | data = buf_strdup(a->personal); |
780 | 0 | rfc2047_encode(&data, AddressSpecials, col, c_send_charset); |
781 | 0 | buf_strcpy(a->personal, data); |
782 | 0 | FREE(&data); |
783 | 0 | } |
784 | 0 | else if (a->group && a->mailbox) |
785 | 0 | { |
786 | 0 | data = buf_strdup(a->mailbox); |
787 | 0 | rfc2047_encode(&data, AddressSpecials, col, c_send_charset); |
788 | 0 | buf_strcpy(a->mailbox, data); |
789 | 0 | FREE(&data); |
790 | 0 | } |
791 | 0 | } |
792 | 0 | } |
793 | | |
794 | | /** |
795 | | * rfc2047_decode_addrlist - Decode any RFC2047 headers in an Address list |
796 | | * @param al AddressList |
797 | | * |
798 | | * @note rfc2047_decode() may realloc the data pointer it's given, |
799 | | * so work on a copy to avoid breaking the Buffer |
800 | | */ |
801 | | void rfc2047_decode_addrlist(struct AddressList *al) |
802 | 980k | { |
803 | 980k | if (!al) |
804 | 0 | return; |
805 | | |
806 | 980k | const bool assumed = !slist_is_empty(cc_assumed_charset()); |
807 | 980k | struct Address *a = NULL; |
808 | 980k | char *data = NULL; |
809 | 980k | TAILQ_FOREACH(a, al, entries) |
810 | 74.2k | { |
811 | 74.2k | if (a->personal && ((buf_find_string(a->personal, "=?")) || assumed)) |
812 | 201 | { |
813 | 201 | data = buf_strdup(a->personal); |
814 | 201 | rfc2047_decode(&data); |
815 | 201 | buf_strcpy(a->personal, data); |
816 | 201 | FREE(&data); |
817 | 201 | } |
818 | 74.0k | else if (a->group && a->mailbox && buf_find_string(a->mailbox, "=?")) |
819 | 3.42k | { |
820 | 3.42k | data = buf_strdup(a->mailbox); |
821 | 3.42k | rfc2047_decode(&data); |
822 | 3.42k | buf_strcpy(a->mailbox, data); |
823 | 3.42k | FREE(&data); |
824 | 3.42k | } |
825 | 74.2k | } |
826 | 980k | } |
827 | | |
828 | | /** |
829 | | * rfc2047_decode_envelope - Decode the fields of an Envelope |
830 | | * @param env Envelope |
831 | | */ |
832 | | void rfc2047_decode_envelope(struct Envelope *env) |
833 | 122k | { |
834 | 122k | if (!env) |
835 | 0 | return; |
836 | 122k | rfc2047_decode_addrlist(&env->from); |
837 | 122k | rfc2047_decode_addrlist(&env->to); |
838 | 122k | rfc2047_decode_addrlist(&env->cc); |
839 | 122k | rfc2047_decode_addrlist(&env->bcc); |
840 | 122k | rfc2047_decode_addrlist(&env->reply_to); |
841 | 122k | rfc2047_decode_addrlist(&env->mail_followup_to); |
842 | 122k | rfc2047_decode_addrlist(&env->return_path); |
843 | 122k | rfc2047_decode_addrlist(&env->sender); |
844 | 122k | rfc2047_decode(&env->x_label); |
845 | | |
846 | 122k | char *subj = env->subject; |
847 | 122k | *(char **) &env->subject = NULL; |
848 | 122k | rfc2047_decode(&subj); |
849 | 122k | mutt_env_set_subject(env, subj); |
850 | 122k | FREE(&subj); |
851 | 122k | } |
852 | | |
853 | | /** |
854 | | * rfc2047_encode_envelope - Encode the fields of an Envelope |
855 | | * @param env Envelope |
856 | | */ |
857 | | void rfc2047_encode_envelope(struct Envelope *env) |
858 | 0 | { |
859 | 0 | if (!env) |
860 | 0 | return; |
861 | 0 | rfc2047_encode_addrlist(&env->from, "From"); |
862 | 0 | rfc2047_encode_addrlist(&env->to, "To"); |
863 | 0 | rfc2047_encode_addrlist(&env->cc, "Cc"); |
864 | 0 | rfc2047_encode_addrlist(&env->bcc, "Bcc"); |
865 | 0 | rfc2047_encode_addrlist(&env->reply_to, "Reply-To"); |
866 | 0 | rfc2047_encode_addrlist(&env->mail_followup_to, "Mail-Followup-To"); |
867 | 0 | rfc2047_encode_addrlist(&env->sender, "Sender"); |
868 | 0 | const struct Slist *const c_send_charset = cs_subset_slist(NeoMutt->sub, "send_charset"); |
869 | 0 | rfc2047_encode(&env->x_label, NULL, sizeof("X-Label:"), c_send_charset); |
870 | |
|
871 | 0 | char *subj = env->subject; |
872 | 0 | *(char **) &env->subject = NULL; |
873 | 0 | rfc2047_encode(&subj, NULL, sizeof("Subject:"), c_send_charset); |
874 | 0 | mutt_env_set_subject(env, subj); |
875 | 0 | FREE(&subj); |
876 | 0 | } |