Coverage Report

Created: 2025-03-11 06:49

/src/neomutt/email/rfc2047.c
Line
Count
Source (jump to first uncovered line)
1
/**
2
 * @file
3
 * RFC2047 MIME extensions encoding / decoding routines
4
 *
5
 * @authors
6
 * Copyright (C) 2018 Federico Kircheis <federico.kircheis@gmail.com>
7
 * Copyright (C) 2018-2020 Pietro Cerutti <gahr@gahr.ch>
8
 * Copyright (C) 2018-2023 Richard Russon <rich@flatcap.org>
9
 * Copyright (C) 2023 Anna Figueiredo Gomes <navi@vlhl.dev>
10
 * Copyright (C) 2023 наб <nabijaczleweli@nabijaczleweli.xyz>
11
 *
12
 * @copyright
13
 * This program is free software: you can redistribute it and/or modify it under
14
 * the terms of the GNU General Public License as published by the Free Software
15
 * Foundation, either version 2 of the License, or (at your option) any later
16
 * version.
17
 *
18
 * This program is distributed in the hope that it will be useful, but WITHOUT
19
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
20
 * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
21
 * details.
22
 *
23
 * You should have received a copy of the GNU General Public License along with
24
 * this program.  If not, see <http://www.gnu.org/licenses/>.
25
 */
26
27
/**
28
 * @page email_rfc2047 RFC2047 encoding / decoding
29
 *
30
 * RFC2047 MIME extensions encoding / decoding routines.
31
 */
32
33
#include "config.h"
34
#include <ctype.h>
35
#include <errno.h>
36
#include <iconv.h>
37
#include <stdbool.h>
38
#include <string.h>
39
#include "mutt/lib.h"
40
#include "address/lib.h"
41
#include "config/lib.h"
42
#include "core/lib.h"
43
#include "rfc2047.h"
44
#include "envelope.h"
45
#include "mime.h"
46
47
0
#define ENCWORD_LEN_MAX 75
48
0
#define ENCWORD_LEN_MIN 9 /* strlen ("=?.?.?.?=") */
49
50
0
#define HSPACE(ch) (((ch) == '\0') || ((ch) == ' ') || ((ch) == '\t'))
51
52
0
#define CONTINUATION_BYTE(ch) (((ch) & 0xc0) == 0x80)
53
54
/**
55
 * @defgroup encoder_api Mime Encoder API
56
 *
57
 * Prototype for an encoding function
58
 *
59
 * @param res    Buffer for the result
60
 * @param src    String to encode
61
 * @param srclen Length of string to encode
62
 * @param tocode Character encoding
63
 * @retval num Bytes written to buffer
64
 */
65
typedef size_t (*encoder_t)(char *res, const char *buf, size_t buflen, const char *tocode);
66
67
/**
68
 * b_encoder - Base64 Encode a string - Implements ::encoder_t - @ingroup encoder_api
69
 */
70
static size_t b_encoder(char *res, const char *src, size_t srclen, const char *tocode)
71
0
{
72
0
  char *s0 = res;
73
74
0
  memcpy(res, "=?", 2);
75
0
  res += 2;
76
0
  memcpy(res, tocode, strlen(tocode));
77
0
  res += strlen(tocode);
78
0
  memcpy(res, "?B?", 3);
79
0
  res += 3;
80
81
0
  while (srclen)
82
0
  {
83
0
    char encoded[11] = { 0 };
84
0
    size_t rc;
85
0
    size_t in_len = MIN(3, srclen);
86
87
0
    rc = mutt_b64_encode(src, in_len, encoded, sizeof(encoded));
88
0
    for (size_t i = 0; i < rc; i++)
89
0
      *res++ = encoded[i];
90
91
0
    srclen -= in_len;
92
0
    src += in_len;
93
0
  }
94
95
0
  memcpy(res, "?=", 2);
96
0
  res += 2;
97
0
  return res - s0;
98
0
}
99
100
/**
101
 * q_encoder - Quoted-printable Encode a string - Implements ::encoder_t - @ingroup encoder_api
102
 */
103
static size_t q_encoder(char *res, const char *src, size_t srclen, const char *tocode)
104
0
{
105
0
  static const char hex[] = "0123456789ABCDEF";
106
0
  char *s0 = res;
107
108
0
  memcpy(res, "=?", 2);
109
0
  res += 2;
110
0
  memcpy(res, tocode, strlen(tocode));
111
0
  res += strlen(tocode);
112
0
  memcpy(res, "?Q?", 3);
113
0
  res += 3;
114
0
  while (srclen--)
115
0
  {
116
0
    unsigned char c = *src++;
117
0
    if (c == ' ')
118
0
    {
119
0
      *res++ = '_';
120
0
    }
121
0
    else if ((c >= 0x7f) || (c < 0x20) || (c == '_') || strchr(MimeSpecials, c))
122
0
    {
123
0
      *res++ = '=';
124
0
      *res++ = hex[(c & 0xf0) >> 4];
125
0
      *res++ = hex[c & 0x0f];
126
0
    }
127
0
    else
128
0
    {
129
0
      *res++ = c;
130
0
    }
131
0
  }
132
0
  memcpy(res, "?=", 2);
133
0
  res += 2;
134
0
  return res - s0;
135
0
}
136
137
/**
138
 * parse_encoded_word - Parse a string and report RFC2047 elements
139
 * @param[in]  str        String to parse
140
 * @param[out] enc        Content encoding found in the first RFC2047 word
141
 * @param[out] charset    Charset found in the first RFC2047 word
142
 * @param[out] charsetlen Length of the charset string found
143
 * @param[out] text       Start of the first RFC2047 encoded text
144
 * @param[out] textlen    Length of the encoded text found
145
 * @retval ptr Start of the RFC2047 encoded word
146
 * @retval NULL None was found
147
 */
148
static char *parse_encoded_word(char *str, enum ContentEncoding *enc, char **charset,
149
                                size_t *charsetlen, char **text, size_t *textlen)
150
1.48M
{
151
1.48M
  regmatch_t *match = mutt_prex_capture(PREX_RFC2047_ENCODED_WORD, str);
152
1.48M
  if (!match)
153
1.48M
    return NULL;
154
155
0
  const regmatch_t *mfull = &match[PREX_RFC2047_ENCODED_WORD_MATCH_FULL];
156
0
  const regmatch_t *mcharset = &match[PREX_RFC2047_ENCODED_WORD_MATCH_CHARSET];
157
0
  const regmatch_t *mencoding = &match[PREX_RFC2047_ENCODED_WORD_MATCH_ENCODING];
158
0
  const regmatch_t *mtext = &match[PREX_RFC2047_ENCODED_WORD_MATCH_TEXT];
159
160
  /* Charset */
161
0
  *charset = str + mutt_regmatch_start(mcharset);
162
0
  *charsetlen = mutt_regmatch_len(mcharset);
163
164
  /* Encoding: either Q or B */
165
0
  *enc = (tolower(str[mutt_regmatch_start(mencoding)]) == 'q') ? ENC_QUOTED_PRINTABLE : ENC_BASE64;
166
167
0
  *text = str + mutt_regmatch_start(mtext);
168
0
  *textlen = mutt_regmatch_len(mtext);
169
0
  return str + mutt_regmatch_start(mfull);
170
1.48M
}
171
172
/**
173
 * try_block - Attempt to convert a block of text
174
 * @param d        String to convert
175
 * @param dlen     Length of string
176
 * @param fromcode Original encoding
177
 * @param tocode   New encoding
178
 * @param encoder  Encoding function
179
 * @param wlen     Number of characters converted
180
 * @retval  0 Success, string converted
181
 * @retval >0 Error, number of bytes that could be converted
182
 *
183
 * If the data could be converted using encoder, then set *encoder and *wlen.
184
 * Otherwise return an upper bound on the maximum length of the data which
185
 * could be converted.
186
 *
187
 * The data is converted from fromcode (which must be stateless) to tocode,
188
 * unless fromcode is NULL, in which case the data is assumed to be already in
189
 * tocode, which should be 8-bit and stateless.
190
 */
191
static size_t try_block(const char *d, size_t dlen, const char *fromcode,
192
                        const char *tocode, encoder_t *encoder, size_t *wlen)
193
0
{
194
0
  char buf[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
195
0
  const char *ib = NULL;
196
0
  char *ob = NULL;
197
0
  size_t ibl, obl;
198
0
  int count, len, len_b, len_q;
199
200
0
  if (fromcode)
201
0
  {
202
0
    iconv_t cd = mutt_ch_iconv_open(tocode, fromcode, MUTT_ICONV_NO_FLAGS);
203
0
    ASSERT(iconv_t_valid(cd));
204
0
    ib = d;
205
0
    ibl = dlen;
206
0
    ob = buf;
207
0
    obl = sizeof(buf) - strlen(tocode);
208
0
    if ((iconv(cd, (ICONV_CONST char **) &ib, &ibl, &ob, &obl) == ICONV_ILLEGAL_SEQ) ||
209
0
        (iconv(cd, NULL, NULL, &ob, &obl) == ICONV_ILLEGAL_SEQ))
210
0
    {
211
0
      ASSERT(errno == E2BIG);
212
0
      ASSERT(ib > d);
213
0
      return ((ib - d) == dlen) ? dlen : ib - d + 1;
214
0
    }
215
0
  }
216
0
  else
217
0
  {
218
0
    if (dlen > (sizeof(buf) - strlen(tocode)))
219
0
      return sizeof(buf) - strlen(tocode) + 1;
220
0
    memcpy(buf, d, dlen);
221
0
    ob = buf + dlen;
222
0
  }
223
224
0
  count = 0;
225
0
  for (char *p = buf; p < ob; p++)
226
0
  {
227
0
    unsigned char c = *p;
228
0
    ASSERT(strchr(MimeSpecials, '?'));
229
0
    if ((c >= 0x7f) || (c < 0x20) || (*p == '_') ||
230
0
        ((c != ' ') && strchr(MimeSpecials, *p)))
231
0
    {
232
0
      count++;
233
0
    }
234
0
  }
235
236
0
  len = ENCWORD_LEN_MIN - 2 + strlen(tocode);
237
0
  len_b = len + (((ob - buf) + 2) / 3) * 4;
238
0
  len_q = len + (ob - buf) + 2 * count;
239
240
  /* Apparently RFC1468 says to use B encoding for iso-2022-jp. */
241
0
  if (mutt_istr_equal(tocode, "ISO-2022-JP"))
242
0
    len_q = ENCWORD_LEN_MAX + 1;
243
244
0
  if ((len_b < len_q) && (len_b <= ENCWORD_LEN_MAX))
245
0
  {
246
0
    *encoder = b_encoder;
247
0
    *wlen = len_b;
248
0
    return 0;
249
0
  }
250
0
  else if (len_q <= ENCWORD_LEN_MAX)
251
0
  {
252
0
    *encoder = q_encoder;
253
0
    *wlen = len_q;
254
0
    return 0;
255
0
  }
256
0
  else
257
0
  {
258
0
    return dlen;
259
0
  }
260
0
}
261
262
/**
263
 * encode_block - Encode a block of text using an encoder
264
 * @param str      String to convert
265
 * @param buf      Buffer for result
266
 * @param buflen   Buffer length
267
 * @param fromcode Original encoding
268
 * @param tocode   New encoding
269
 * @param encoder  Encoding function
270
 * @retval num Length of the encoded word
271
 *
272
 * Encode the data (buf, buflen) into str using the encoder.
273
 */
274
static size_t encode_block(char *str, char *buf, size_t buflen, const char *fromcode,
275
                           const char *tocode, encoder_t encoder)
276
0
{
277
0
  if (!fromcode)
278
0
  {
279
0
    return (*encoder)(str, buf, buflen, tocode);
280
0
  }
281
282
0
  const iconv_t cd = mutt_ch_iconv_open(tocode, fromcode, MUTT_ICONV_NO_FLAGS);
283
0
  ASSERT(iconv_t_valid(cd));
284
0
  const char *ib = buf;
285
0
  size_t ibl = buflen;
286
0
  char tmp[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
287
0
  char *ob = tmp;
288
0
  size_t obl = sizeof(tmp) - strlen(tocode);
289
0
  const size_t n1 = iconv(cd, (ICONV_CONST char **) &ib, &ibl, &ob, &obl);
290
0
  const size_t n2 = iconv(cd, NULL, NULL, &ob, &obl);
291
0
  ASSERT((n1 != ICONV_ILLEGAL_SEQ) && (n2 != ICONV_ILLEGAL_SEQ));
292
0
  return (*encoder)(str, tmp, ob - tmp, tocode);
293
0
}
294
295
/**
296
 * choose_block - Calculate how much data can be converted
297
 * @param d        String to convert
298
 * @param dlen     Length of string
299
 * @param col      Starting column to convert
300
 * @param fromcode Original encoding
301
 * @param tocode   New encoding
302
 * @param encoder  Encoding function
303
 * @param wlen     Number of characters converted
304
 * @retval num Bytes that can be converted
305
 *
306
 * Discover how much of the data (d, dlen) can be converted into a single
307
 * encoded word. Return how much data can be converted, and set the length
308
 * *wlen of the encoded word and *encoder.  We start in column col, which
309
 * limits the length of the word.
310
 */
311
static size_t choose_block(char *d, size_t dlen, int col, const char *fromcode,
312
                           const char *tocode, encoder_t *encoder, size_t *wlen)
313
0
{
314
0
  const bool utf8 = fromcode && mutt_istr_equal(fromcode, "utf-8");
315
316
0
  size_t n = dlen;
317
0
  while (true)
318
0
  {
319
0
    ASSERT(n > 0);
320
0
    const size_t nn = try_block(d, n, fromcode, tocode, encoder, wlen);
321
0
    if ((nn == 0) && (((col + *wlen) <= (ENCWORD_LEN_MAX + 1)) || (n <= 1)))
322
0
      break;
323
0
    n = ((nn != 0) ? nn : n) - 1;
324
0
    ASSERT(n > 0);
325
0
    if (utf8)
326
0
      while ((n > 1) && CONTINUATION_BYTE(d[n]))
327
0
        n--;
328
0
  }
329
0
  return n;
330
0
}
331
332
/**
333
 * finalize_chunk - Perform charset conversion and filtering
334
 * @param[out] res        Buffer where the resulting string is appended
335
 * @param[in]  buf        Buffer with the input string
336
 * @param[in]  charset    Charset to use for the conversion
337
 * @param[in]  charsetlen Length of the charset parameter
338
 *
339
 * The buffer buf is reinitialized at the end of this function.
340
 */
341
static void finalize_chunk(struct Buffer *res, struct Buffer *buf, char *charset, size_t charsetlen)
342
0
{
343
0
  if (!charset)
344
0
    return;
345
0
  char end = charset[charsetlen];
346
0
  charset[charsetlen] = '\0';
347
0
  mutt_ch_convert_string(&buf->data, charset, cc_charset(), MUTT_ICONV_HOOK_FROM);
348
0
  charset[charsetlen] = end;
349
0
  mutt_mb_filter_unprintable(&buf->data);
350
0
  buf_addstr(res, buf->data);
351
0
  FREE(&buf->data);
352
0
  buf_init(buf);
353
0
}
354
355
/**
356
 * decode_word - Decode an RFC2047-encoded string
357
 * @param s   String to decode
358
 * @param len Length of the string
359
 * @param enc Encoding type
360
 * @retval ptr Decoded string
361
 *
362
 * @note The input string must be NUL-terminated; the len parameter is
363
 *       an optimization. The caller must free the returned string.
364
 */
365
static char *decode_word(const char *s, size_t len, enum ContentEncoding enc)
366
0
{
367
0
  const char *it = s;
368
0
  const char *end = s + len;
369
370
0
  ASSERT(*end == '\0');
371
372
0
  if (enc == ENC_QUOTED_PRINTABLE)
373
0
  {
374
0
    struct Buffer *buf = buf_pool_get();
375
0
    for (; it < end; it++)
376
0
    {
377
0
      if (*it == '_')
378
0
      {
379
0
        buf_addch(buf, ' ');
380
0
      }
381
0
      else if ((it[0] == '=') && (!(it[1] & ~127) && (hexval(it[1]) != -1)) &&
382
0
               (!(it[2] & ~127) && (hexval(it[2]) != -1)))
383
0
      {
384
0
        buf_addch(buf, (hexval(it[1]) << 4) | hexval(it[2]));
385
0
        it += 2;
386
0
      }
387
0
      else
388
0
      {
389
0
        buf_addch(buf, *it);
390
0
      }
391
0
    }
392
0
    char *str = buf_strdup(buf);
393
0
    buf_pool_release(&buf);
394
0
    return str;
395
0
  }
396
0
  else if (enc == ENC_BASE64)
397
0
  {
398
0
    const int olen = 3 * len / 4 + 1;
399
0
    char *out = MUTT_MEM_MALLOC(olen, char);
400
0
    int dlen = mutt_b64_decode(it, out, olen);
401
0
    if (dlen == -1)
402
0
    {
403
0
      FREE(&out);
404
0
      return NULL;
405
0
    }
406
0
    out[dlen] = '\0';
407
0
    return out;
408
0
  }
409
410
0
  ASSERT(0); /* The enc parameter has an invalid value */
411
0
  return NULL;
412
0
}
413
414
/**
415
 * encode - RFC2047-encode a string
416
 * @param[in]  d        String to convert
417
 * @param[in]  dlen     Length of string
418
 * @param[in]  col      Starting column to convert
419
 * @param[in]  fromcode Original encoding
420
 * @param[in]  charsets List of allowable encodings (colon separated)
421
 * @param[out] e        Encoded string
422
 * @param[out] elen     Length of encoded string
423
 * @param[in]  specials Special characters to be encoded
424
 * @retval 0 Success
425
 */
426
static int encode(const char *d, size_t dlen, int col, const char *fromcode,
427
                  const struct Slist *charsets, char **e, size_t *elen, const char *specials)
428
0
{
429
0
  int rc = 0;
430
0
  char *buf = NULL;
431
0
  size_t bufpos, buflen;
432
0
  char *t0 = NULL, *t1 = NULL, *t = NULL;
433
0
  char *s0 = NULL, *s1 = NULL;
434
0
  size_t ulen, r, wlen = 0;
435
0
  encoder_t encoder = NULL;
436
0
  char *tocode1 = NULL;
437
0
  const char *tocode = NULL;
438
0
  const char *icode = "utf-8";
439
440
  /* Try to convert to UTF-8. */
441
0
  char *u = mutt_strn_dup(d, dlen);
442
0
  if (mutt_ch_convert_string(&u, fromcode, icode, MUTT_ICONV_NO_FLAGS) != 0)
443
0
  {
444
0
    rc = 1;
445
0
    icode = 0;
446
0
  }
447
0
  ulen = mutt_str_len(u);
448
449
  /* Find earliest and latest things we must encode. */
450
0
  s0 = 0;
451
0
  s1 = 0;
452
0
  t0 = 0;
453
0
  t1 = 0;
454
0
  for (t = u; t < (u + ulen); t++)
455
0
  {
456
0
    if ((*t & 0x80) || ((*t == '=') && (t[1] == '?') && ((t == u) || HSPACE(*(t - 1)))))
457
0
    {
458
0
      if (!t0)
459
0
        t0 = t;
460
0
      t1 = t;
461
0
    }
462
0
    else if (specials && *t && strchr(specials, *t))
463
0
    {
464
0
      if (!s0)
465
0
        s0 = t;
466
0
      s1 = t;
467
0
    }
468
0
  }
469
470
  /* If we have something to encode, include RFC822 specials */
471
0
  if (t0 && s0 && (s0 < t0))
472
0
    t0 = s0;
473
0
  if (t1 && s1 && (s1 > t1))
474
0
    t1 = s1;
475
476
0
  if (!t0)
477
0
  {
478
    /* No encoding is required. */
479
0
    *e = u;
480
0
    *elen = ulen;
481
0
    return rc;
482
0
  }
483
484
  /* Choose target charset. */
485
0
  tocode = fromcode;
486
0
  if (icode)
487
0
  {
488
0
    tocode1 = mutt_ch_choose(icode, charsets, u, ulen, 0, 0);
489
0
    if (tocode1)
490
0
    {
491
0
      tocode = tocode1;
492
0
    }
493
0
    else
494
0
    {
495
0
      rc = 2;
496
0
      icode = 0;
497
0
    }
498
0
  }
499
500
  /* Hack to avoid labelling 8-bit data as us-ascii. */
501
0
  if (!icode && mutt_ch_is_us_ascii(tocode))
502
0
    tocode = "unknown-8bit";
503
504
  /* Adjust t0 for maximum length of line. */
505
0
  t = u + (ENCWORD_LEN_MAX + 1) - col - ENCWORD_LEN_MIN;
506
0
  if (t < u)
507
0
    t = u;
508
0
  if (t < t0)
509
0
    t0 = t;
510
511
  /* Adjust t0 until we can encode a character after a space. */
512
0
  for (; t0 > u; t0--)
513
0
  {
514
0
    if (!HSPACE(*(t0 - 1)))
515
0
      continue;
516
0
    t = t0 + 1;
517
0
    if (icode)
518
0
      while ((t < (u + ulen)) && CONTINUATION_BYTE(*t))
519
0
        t++;
520
0
    if ((try_block(t0, t - t0, icode, tocode, &encoder, &wlen) == 0) &&
521
0
        ((col + (t0 - u) + wlen) <= (ENCWORD_LEN_MAX + 1)))
522
0
    {
523
0
      break;
524
0
    }
525
0
  }
526
527
  /* Adjust t1 until we can encode a character before a space. */
528
0
  for (; t1 < (u + ulen); t1++)
529
0
  {
530
0
    if (!HSPACE(*t1))
531
0
      continue;
532
0
    t = t1 - 1;
533
0
    if (icode)
534
0
      while (CONTINUATION_BYTE(*t))
535
0
        t--;
536
0
    if ((try_block(t, t1 - t, icode, tocode, &encoder, &wlen) == 0) &&
537
0
        ((1 + wlen + (u + ulen - t1)) <= (ENCWORD_LEN_MAX + 1)))
538
0
    {
539
0
      break;
540
0
    }
541
0
  }
542
543
  /* We shall encode the region [t0,t1). */
544
545
  /* Initialise the output buffer with the us-ascii prefix. */
546
0
  buflen = 2 * ulen;
547
0
  buf = MUTT_MEM_MALLOC(buflen, char);
548
0
  bufpos = t0 - u;
549
0
  memcpy(buf, u, t0 - u);
550
551
0
  col += t0 - u;
552
553
0
  t = t0;
554
0
  while (true)
555
0
  {
556
    /* Find how much we can encode. */
557
0
    size_t n = choose_block(t, t1 - t, col, icode, tocode, &encoder, &wlen);
558
0
    if (n == (t1 - t))
559
0
    {
560
      /* See if we can fit the us-ascii suffix, too. */
561
0
      if ((col + wlen + (u + ulen - t1)) <= (ENCWORD_LEN_MAX + 1))
562
0
        break;
563
0
      n = t1 - t - 1;
564
0
      if (icode)
565
0
        while (CONTINUATION_BYTE(t[n]))
566
0
          n--;
567
0
      if (n == 0)
568
0
      {
569
        /* This should only happen in the really stupid case where the
570
         * only word that needs encoding is one character long, but
571
         * there is too much us-ascii stuff after it to use a single
572
         * encoded word. We add the next word to the encoded region
573
         * and try again. */
574
0
        ASSERT(t1 < (u + ulen));
575
0
        for (t1++; (t1 < (u + ulen)) && !HSPACE(*t1); t1++)
576
0
          ; // do nothing
577
578
0
        continue;
579
0
      }
580
0
      n = choose_block(t, n, col, icode, tocode, &encoder, &wlen);
581
0
    }
582
583
    /* Add to output buffer. */
584
0
    const char *line_break = "\n\t";
585
0
    const int lb_len = 2; /* strlen(line_break) */
586
587
0
    if ((bufpos + wlen + lb_len) > buflen)
588
0
    {
589
0
      buflen = bufpos + wlen + lb_len;
590
0
      MUTT_MEM_REALLOC(&buf, buflen, char);
591
0
    }
592
0
    r = encode_block(buf + bufpos, t, n, icode, tocode, encoder);
593
0
    ASSERT(r == wlen);
594
0
    bufpos += wlen;
595
0
    memcpy(buf + bufpos, line_break, lb_len);
596
0
    bufpos += lb_len;
597
598
0
    col = 1;
599
600
0
    t += n;
601
0
  }
602
603
  /* Add last encoded word and us-ascii suffix to buffer. */
604
0
  buflen = bufpos + wlen + (u + ulen - t1);
605
0
  MUTT_MEM_REALLOC(&buf, buflen + 1, char);
606
0
  r = encode_block(buf + bufpos, t, t1 - t, icode, tocode, encoder);
607
0
  ASSERT(r == wlen);
608
0
  bufpos += wlen;
609
0
  memcpy(buf + bufpos, t1, u + ulen - t1);
610
611
0
  FREE(&tocode1);
612
0
  FREE(&u);
613
614
0
  buf[buflen] = '\0';
615
616
0
  *e = buf;
617
0
  *elen = buflen + 1;
618
0
  return rc;
619
0
}
620
621
/**
622
 * rfc2047_encode - RFC-2047-encode a string
623
 * @param[in,out] pd       String to be encoded, and resulting encoded string
624
 * @param[in]     specials Special characters to be encoded
625
 * @param[in]     col      Starting index in string
626
 * @param[in]     charsets List of charsets to choose from
627
 */
628
void rfc2047_encode(char **pd, const char *specials, int col, const struct Slist *charsets)
629
0
{
630
0
  if (!pd || !*pd)
631
0
    return;
632
633
0
  const char *const c_charset = cc_charset();
634
0
  if (!c_charset)
635
0
    return;
636
637
0
  struct Slist *fallback = NULL;
638
0
  if (!charsets)
639
0
  {
640
0
    fallback = slist_parse("utf-8", D_SLIST_SEP_COLON);
641
0
    charsets = fallback;
642
0
  }
643
644
0
  char *e = NULL;
645
0
  size_t elen = 0;
646
0
  encode(*pd, strlen(*pd), col, c_charset, charsets, &e, &elen, specials);
647
648
0
  slist_free(&fallback);
649
0
  FREE(pd);
650
0
  *pd = e;
651
0
}
652
653
/**
654
 * rfc2047_decode - Decode any RFC2047-encoded header fields
655
 * @param[in,out] pd  String to be decoded, and resulting decoded string
656
 *
657
 * Try to decode anything that looks like a valid RFC2047 encoded header field,
658
 * ignoring RFC822 parsing rules. If decoding fails, for example due to an
659
 * invalid base64 string, the original input is left untouched.
660
 */
661
void rfc2047_decode(char **pd)
662
1.73M
{
663
1.73M
  if (!pd || !*pd)
664
243k
    return;
665
666
1.48M
  struct Buffer *buf = buf_pool_get();  // Output buffer
667
1.48M
  char *s = *pd;                        // Read pointer
668
1.48M
  char *beg = NULL;                     // Begin of encoded word
669
1.48M
  enum ContentEncoding enc = ENC_OTHER; // ENC_BASE64 or ENC_QUOTED_PRINTABLE
670
1.48M
  char *charset = NULL;                 // Which charset
671
1.48M
  size_t charsetlen;                    // Length of the charset
672
1.48M
  char *text = NULL;                    // Encoded text
673
1.48M
  size_t textlen = 0;                   // Length of encoded text
674
675
  /* Keep some state in case the next decoded word is using the same charset
676
   * and it happens to be split in the middle of a multibyte character.
677
   * See https://github.com/neomutt/neomutt/issues/1015 */
678
1.48M
  struct Buffer *prev = buf_pool_get(); /* Previously decoded word  */
679
1.48M
  char *prev_charset = NULL;  /* Previously used charset                */
680
1.48M
  size_t prev_charsetlen = 0; /* Length of the previously used charset  */
681
682
1.48M
  const struct Slist *c_assumed_charset = cc_assumed_charset();
683
1.48M
  const char *c_charset = cc_charset();
684
2.97M
  while (*s)
685
1.48M
  {
686
1.48M
    beg = parse_encoded_word(s, &enc, &charset, &charsetlen, &text, &textlen);
687
1.48M
    if (beg != s)
688
1.48M
    {
689
      /* Some non-encoded text was found */
690
1.48M
      size_t holelen = beg ? beg - s : mutt_str_len(s);
691
692
      /* Ignore whitespace between encoded words */
693
1.48M
      if (beg && (mutt_str_lws_len(s, holelen) == holelen))
694
0
      {
695
0
        s = beg;
696
0
        continue;
697
0
      }
698
699
      /* If we have some previously decoded text, add it now */
700
1.48M
      if (!buf_is_empty(prev))
701
0
      {
702
0
        finalize_chunk(buf, prev, prev_charset, prev_charsetlen);
703
0
      }
704
705
      /* Add non-encoded part */
706
1.48M
      if (slist_is_empty(c_assumed_charset))
707
1.48M
      {
708
1.48M
        buf_addstr_n(buf, s, holelen);
709
1.48M
      }
710
0
      else
711
0
      {
712
0
        char *conv = mutt_strn_dup(s, holelen);
713
0
        mutt_ch_convert_nonmime_string(c_assumed_charset, c_charset, &conv);
714
0
        buf_addstr(buf, conv);
715
0
        FREE(&conv);
716
0
      }
717
1.48M
      s += holelen;
718
1.48M
    }
719
1.48M
    if (beg)
720
0
    {
721
      /* Some encoded text was found */
722
0
      text[textlen] = '\0';
723
0
      char *decoded = decode_word(text, textlen, enc);
724
0
      if (!decoded)
725
0
      {
726
0
        goto done;
727
0
      }
728
0
      if (!buf_is_empty(prev) && ((prev_charsetlen != charsetlen) ||
729
0
                                  !mutt_strn_equal(prev_charset, charset, charsetlen)))
730
0
      {
731
        /* Different charset, convert the previous chunk and add it to the
732
         * final result */
733
0
        finalize_chunk(buf, prev, prev_charset, prev_charsetlen);
734
0
      }
735
736
0
      buf_addstr(prev, decoded);
737
0
      FREE(&decoded);
738
0
      prev_charset = charset;
739
0
      prev_charsetlen = charsetlen;
740
0
      s = text + textlen + 2; /* Skip final ?= */
741
0
    }
742
1.48M
  }
743
744
  /* Save the last chunk */
745
1.48M
  if (!buf_is_empty(prev))
746
0
  {
747
0
    finalize_chunk(buf, prev, prev_charset, prev_charsetlen);
748
0
  }
749
750
1.48M
  FREE(pd);
751
1.48M
  *pd = buf_strdup(buf);
752
753
1.48M
done:
754
1.48M
  buf_pool_release(&buf);
755
1.48M
  buf_pool_release(&prev);
756
1.48M
}
757
758
/**
759
 * rfc2047_encode_addrlist - Encode any RFC2047 headers, where required, in an Address list
760
 * @param al   AddressList
761
 * @param tag  Header tag (used for wrapping calculation)
762
 *
763
 * @note rfc2047_encode() may realloc the data pointer it's given,
764
 *       so work on a copy to avoid breaking the Buffer
765
 */
766
void rfc2047_encode_addrlist(struct AddressList *al, const char *tag)
767
0
{
768
0
  if (!al)
769
0
    return;
770
771
0
  int col = tag ? strlen(tag) + 2 : 32;
772
0
  struct Address *a = NULL;
773
0
  char *data = NULL;
774
0
  const struct Slist *const c_send_charset = cs_subset_slist(NeoMutt->sub, "send_charset");
775
0
  TAILQ_FOREACH(a, al, entries)
776
0
  {
777
0
    if (a->personal)
778
0
    {
779
0
      data = buf_strdup(a->personal);
780
0
      rfc2047_encode(&data, AddressSpecials, col, c_send_charset);
781
0
      buf_strcpy(a->personal, data);
782
0
      FREE(&data);
783
0
    }
784
0
    else if (a->group && a->mailbox)
785
0
    {
786
0
      data = buf_strdup(a->mailbox);
787
0
      rfc2047_encode(&data, AddressSpecials, col, c_send_charset);
788
0
      buf_strcpy(a->mailbox, data);
789
0
      FREE(&data);
790
0
    }
791
0
  }
792
0
}
793
794
/**
795
 * rfc2047_decode_addrlist - Decode any RFC2047 headers in an Address list
796
 * @param al AddressList
797
 *
798
 * @note rfc2047_decode() may realloc the data pointer it's given,
799
 *       so work on a copy to avoid breaking the Buffer
800
 */
801
void rfc2047_decode_addrlist(struct AddressList *al)
802
980k
{
803
980k
  if (!al)
804
0
    return;
805
806
980k
  const bool assumed = !slist_is_empty(cc_assumed_charset());
807
980k
  struct Address *a = NULL;
808
980k
  char *data = NULL;
809
980k
  TAILQ_FOREACH(a, al, entries)
810
74.2k
  {
811
74.2k
    if (a->personal && ((buf_find_string(a->personal, "=?")) || assumed))
812
201
    {
813
201
      data = buf_strdup(a->personal);
814
201
      rfc2047_decode(&data);
815
201
      buf_strcpy(a->personal, data);
816
201
      FREE(&data);
817
201
    }
818
74.0k
    else if (a->group && a->mailbox && buf_find_string(a->mailbox, "=?"))
819
3.42k
    {
820
3.42k
      data = buf_strdup(a->mailbox);
821
3.42k
      rfc2047_decode(&data);
822
3.42k
      buf_strcpy(a->mailbox, data);
823
3.42k
      FREE(&data);
824
3.42k
    }
825
74.2k
  }
826
980k
}
827
828
/**
829
 * rfc2047_decode_envelope - Decode the fields of an Envelope
830
 * @param env Envelope
831
 */
832
void rfc2047_decode_envelope(struct Envelope *env)
833
122k
{
834
122k
  if (!env)
835
0
    return;
836
122k
  rfc2047_decode_addrlist(&env->from);
837
122k
  rfc2047_decode_addrlist(&env->to);
838
122k
  rfc2047_decode_addrlist(&env->cc);
839
122k
  rfc2047_decode_addrlist(&env->bcc);
840
122k
  rfc2047_decode_addrlist(&env->reply_to);
841
122k
  rfc2047_decode_addrlist(&env->mail_followup_to);
842
122k
  rfc2047_decode_addrlist(&env->return_path);
843
122k
  rfc2047_decode_addrlist(&env->sender);
844
122k
  rfc2047_decode(&env->x_label);
845
846
122k
  char *subj = env->subject;
847
122k
  *(char **) &env->subject = NULL;
848
122k
  rfc2047_decode(&subj);
849
122k
  mutt_env_set_subject(env, subj);
850
122k
  FREE(&subj);
851
122k
}
852
853
/**
854
 * rfc2047_encode_envelope - Encode the fields of an Envelope
855
 * @param env Envelope
856
 */
857
void rfc2047_encode_envelope(struct Envelope *env)
858
0
{
859
0
  if (!env)
860
0
    return;
861
0
  rfc2047_encode_addrlist(&env->from, "From");
862
0
  rfc2047_encode_addrlist(&env->to, "To");
863
0
  rfc2047_encode_addrlist(&env->cc, "Cc");
864
0
  rfc2047_encode_addrlist(&env->bcc, "Bcc");
865
0
  rfc2047_encode_addrlist(&env->reply_to, "Reply-To");
866
0
  rfc2047_encode_addrlist(&env->mail_followup_to, "Mail-Followup-To");
867
0
  rfc2047_encode_addrlist(&env->sender, "Sender");
868
0
  const struct Slist *const c_send_charset = cs_subset_slist(NeoMutt->sub, "send_charset");
869
0
  rfc2047_encode(&env->x_label, NULL, sizeof("X-Label:"), c_send_charset);
870
871
0
  char *subj = env->subject;
872
0
  *(char **) &env->subject = NULL;
873
0
  rfc2047_encode(&subj, NULL, sizeof("Subject:"), c_send_charset);
874
0
  mutt_env_set_subject(env, subj);
875
0
  FREE(&subj);
876
0
}