Coverage Report

Created: 2024-02-25 06:12

/src/neomutt/email/parse.c
Line
Count
Source (jump to first uncovered line)
1
/**
2
 * @file
3
 * Miscellaneous email parsing routines
4
 *
5
 * @authors
6
 * Copyright (C) 2016-2023 Richard Russon <rich@flatcap.org>
7
 * Copyright (C) 2017-2023 Pietro Cerutti <gahr@gahr.ch>
8
 * Copyright (C) 2019 Federico Kircheis <federico.kircheis@gmail.com>
9
 * Copyright (C) 2019 Ian Zimmerman <itz@no-use.mooo.com>
10
 * Copyright (C) 2021 Christian Ludwig <ludwig@ma.tum.de>
11
 * Copyright (C) 2022 David Purton <dcpurton@marshwiggle.net>
12
 * Copyright (C) 2023 Steinar H Gunderson <steinar+neomutt@gunderson.no>
13
 *
14
 * @copyright
15
 * This program is free software: you can redistribute it and/or modify it under
16
 * the terms of the GNU General Public License as published by the Free Software
17
 * Foundation, either version 2 of the License, or (at your option) any later
18
 * version.
19
 *
20
 * This program is distributed in the hope that it will be useful, but WITHOUT
21
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
22
 * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
23
 * details.
24
 *
25
 * You should have received a copy of the GNU General Public License along with
26
 * this program.  If not, see <http://www.gnu.org/licenses/>.
27
 */
28
29
/**
30
 * @page email_parse Email parsing code
31
 *
32
 * Miscellaneous email parsing routines
33
 */
34
35
#include "config.h"
36
#include <ctype.h>
37
#include <errno.h>
38
#include <string.h>
39
#include <time.h>
40
#include "mutt/lib.h"
41
#include "address/lib.h"
42
#include "config/lib.h"
43
#include "core/lib.h"
44
#include "mutt.h"
45
#include "parse.h"
46
#include "body.h"
47
#include "email.h"
48
#include "envelope.h"
49
#include "from.h"
50
#include "globals.h"
51
#include "mime.h"
52
#include "parameter.h"
53
#include "rfc2047.h"
54
#include "rfc2231.h"
55
#include "url.h"
56
#ifdef USE_AUTOCRYPT
57
#include "autocrypt/lib.h"
58
#endif
59
60
/* If the 'Content-Length' is bigger than 1GiB, then it's clearly wrong.
61
 * Cap the value to prevent overflow of Body.length */
62
#define CONTENT_TOO_BIG (1 << 30)
63
64
static void parse_part(FILE *fp, struct Body *b, int *counter);
65
static struct Body *rfc822_parse_message(FILE *fp, struct Body *parent, int *counter);
66
static struct Body *parse_multipart(FILE *fp, const char *boundary,
67
                                    LOFF_T end_off, bool digest, int *counter);
68
69
/**
70
 * mutt_auto_subscribe - Check if user is subscribed to mailing list
71
 * @param mailto URL of mailing list subscribe
72
 */
73
void mutt_auto_subscribe(const char *mailto)
74
0
{
75
0
  if (!mailto)
76
0
    return;
77
78
0
  if (!AutoSubscribeCache)
79
0
    AutoSubscribeCache = mutt_hash_new(200, MUTT_HASH_STRCASECMP | MUTT_HASH_STRDUP_KEYS);
80
81
0
  if (mutt_hash_find(AutoSubscribeCache, mailto))
82
0
    return;
83
84
0
  mutt_hash_insert(AutoSubscribeCache, mailto, AutoSubscribeCache);
85
86
0
  struct Envelope *lpenv = mutt_env_new(); /* parsed envelope from the List-Post mailto: URL */
87
88
0
  if (mutt_parse_mailto(lpenv, NULL, mailto) && !TAILQ_EMPTY(&lpenv->to))
89
0
  {
90
0
    const char *mailbox = buf_string(TAILQ_FIRST(&lpenv->to)->mailbox);
91
0
    if (mailbox && !mutt_regexlist_match(&SubscribedLists, mailbox) &&
92
0
        !mutt_regexlist_match(&UnMailLists, mailbox) &&
93
0
        !mutt_regexlist_match(&UnSubscribedLists, mailbox))
94
0
    {
95
      /* mutt_regexlist_add() detects duplicates, so it is safe to
96
       * try to add here without any checks. */
97
0
      mutt_regexlist_add(&MailLists, mailbox, REG_ICASE, NULL);
98
0
      mutt_regexlist_add(&SubscribedLists, mailbox, REG_ICASE, NULL);
99
0
    }
100
0
  }
101
102
0
  mutt_env_free(&lpenv);
103
0
}
104
105
/**
106
 * parse_parameters - Parse a list of Parameters
107
 * @param pl                 Parameter list for the results
108
 * @param s                  String to parse
109
 * @param allow_value_spaces Allow values with spaces
110
 *
111
 * Autocrypt defines an irregular parameter format that doesn't follow the
112
 * rfc.  It splits keydata across multiple lines without parameter continuations.
113
 * The allow_value_spaces parameter allows parsing those values which
114
 * are split by spaces when unfolded.
115
 */
116
static void parse_parameters(struct ParameterList *pl, const char *s, bool allow_value_spaces)
117
70.4k
{
118
70.4k
  struct Parameter *pnew = NULL;
119
70.4k
  const char *p = NULL;
120
70.4k
  size_t i;
121
122
70.4k
  struct Buffer *buf = buf_pool_get();
123
  /* allow_value_spaces, especially with autocrypt keydata, can result
124
   * in quite large parameter values.  avoid frequent reallocs by
125
   * pre-sizing */
126
70.4k
  if (allow_value_spaces)
127
0
    buf_alloc(buf, mutt_str_len(s));
128
129
70.4k
  mutt_debug(LL_DEBUG2, "'%s'\n", s);
130
131
70.4k
  const bool assumed = !slist_is_empty(cc_assumed_charset());
132
169k
  while (*s)
133
122k
  {
134
122k
    buf_reset(buf);
135
136
122k
    p = strpbrk(s, "=;");
137
122k
    if (!p)
138
15.8k
    {
139
15.8k
      mutt_debug(LL_DEBUG1, "malformed parameter: %s\n", s);
140
15.8k
      goto bail;
141
15.8k
    }
142
143
    /* if we hit a ; now the parameter has no value, just skip it */
144
106k
    if (*p != ';')
145
44.8k
    {
146
44.8k
      i = p - s;
147
      /* remove whitespace from the end of the attribute name */
148
45.4k
      while ((i > 0) && mutt_str_is_email_wsp(s[i - 1]))
149
592
        i--;
150
151
      /* the check for the missing parameter token is here so that we can skip
152
       * over any quoted value that may be present.  */
153
44.8k
      if (i == 0)
154
4.27k
      {
155
4.27k
        mutt_debug(LL_DEBUG1, "missing attribute: %s\n", s);
156
4.27k
        pnew = NULL;
157
4.27k
      }
158
40.5k
      else
159
40.5k
      {
160
40.5k
        pnew = mutt_param_new();
161
40.5k
        pnew->attribute = mutt_strn_dup(s, i);
162
40.5k
      }
163
164
44.8k
      do
165
44.8k
      {
166
44.8k
        s = mutt_str_skip_email_wsp(p + 1); /* skip over the =, or space if we loop */
167
168
44.8k
        if (*s == '"')
169
4.32k
        {
170
4.32k
          bool state_ascii = true;
171
4.32k
          s++;
172
27.4k
          for (; *s; s++)
173
26.2k
          {
174
26.2k
            if (assumed)
175
0
            {
176
              // As iso-2022-* has a character of '"' with non-ascii state, ignore it
177
0
              if (*s == 0x1b)
178
0
              {
179
0
                if ((s[1] == '(') && ((s[2] == 'B') || (s[2] == 'J')))
180
0
                  state_ascii = true;
181
0
                else
182
0
                  state_ascii = false;
183
0
              }
184
0
            }
185
26.2k
            if (state_ascii && (*s == '"'))
186
3.08k
              break;
187
23.1k
            if (*s == '\\')
188
400
            {
189
400
              if (s[1])
190
203
              {
191
203
                s++;
192
                /* Quote the next character */
193
203
                buf_addch(buf, *s);
194
203
              }
195
400
            }
196
22.7k
            else
197
22.7k
            {
198
22.7k
              buf_addch(buf, *s);
199
22.7k
            }
200
23.1k
          }
201
4.32k
          if (*s)
202
3.08k
            s++; /* skip over the " */
203
4.32k
        }
204
40.5k
        else
205
40.5k
        {
206
2.83M
          for (; *s && *s != ' ' && *s != ';'; s++)
207
2.79M
            buf_addch(buf, *s);
208
40.5k
        }
209
210
44.8k
        p = s;
211
44.8k
      } while (allow_value_spaces && (*s == ' '));
212
213
      /* if the attribute token was missing, 'new' will be NULL */
214
44.8k
      if (pnew)
215
40.5k
      {
216
40.5k
        pnew->value = buf_strdup(buf);
217
218
40.5k
        mutt_debug(LL_DEBUG2, "parse_parameter: '%s' = '%s'\n",
219
40.5k
                   pnew->attribute ? pnew->attribute : "", pnew->value ? pnew->value : "");
220
221
        /* Add this parameter to the list */
222
40.5k
        TAILQ_INSERT_HEAD(pl, pnew, entries);
223
40.5k
      }
224
44.8k
    }
225
61.9k
    else
226
61.9k
    {
227
61.9k
      mutt_debug(LL_DEBUG1, "parameter with no value: %s\n", s);
228
61.9k
      s = p;
229
61.9k
    }
230
231
    /* Find the next parameter */
232
106k
    if ((*s != ';') && !(s = strchr(s, ';')))
233
7.29k
      break; /* no more parameters */
234
235
99.4k
    do
236
102k
    {
237
      /* Move past any leading whitespace. the +1 skips over the semicolon */
238
102k
      s = mutt_str_skip_email_wsp(s + 1);
239
102k
    } while (*s == ';'); /* skip empty parameters */
240
99.4k
  }
241
242
70.4k
bail:
243
244
70.4k
  rfc2231_decode_parameters(pl);
245
70.4k
  buf_pool_release(&buf);
246
70.4k
}
247
248
/**
249
 * parse_content_disposition - Parse a content disposition
250
 * @param s String to parse
251
 * @param b Body to save the result
252
 *
253
 * e.g. parse a string "inline" and set #DISP_INLINE.
254
 */
255
static void parse_content_disposition(const char *s, struct Body *b)
256
2.20k
{
257
2.20k
  struct ParameterList pl = TAILQ_HEAD_INITIALIZER(pl);
258
259
2.20k
  if (mutt_istr_startswith(s, "inline"))
260
198
    b->disposition = DISP_INLINE;
261
2.00k
  else if (mutt_istr_startswith(s, "form-data"))
262
196
    b->disposition = DISP_FORM_DATA;
263
1.81k
  else
264
1.81k
    b->disposition = DISP_ATTACH;
265
266
  /* Check to see if a default filename was given */
267
2.20k
  s = strchr(s, ';');
268
2.20k
  if (s)
269
1.52k
  {
270
1.52k
    s = mutt_str_skip_email_wsp(s + 1);
271
1.52k
    parse_parameters(&pl, s, false);
272
1.52k
    s = mutt_param_get(&pl, "filename");
273
1.52k
    if (s)
274
681
      mutt_str_replace(&b->filename, s);
275
1.52k
    s = mutt_param_get(&pl, "name");
276
1.52k
    if (s)
277
320
      mutt_str_replace(&b->form_name, s);
278
1.52k
    mutt_param_free(&pl);
279
1.52k
  }
280
2.20k
}
281
282
/**
283
 * parse_references - Parse references from an email header
284
 * @param head List to receive the references
285
 * @param s    String to parse
286
 */
287
static void parse_references(struct ListHead *head, const char *s)
288
6.55k
{
289
6.55k
  if (!head)
290
0
    return;
291
292
6.55k
  char *m = NULL;
293
1.47M
  for (size_t off = 0; (m = mutt_extract_message_id(s, &off)); s += off)
294
1.46M
  {
295
1.46M
    mutt_list_insert_head(head, m);
296
1.46M
  }
297
6.55k
}
298
299
/**
300
 * parse_content_language - Read the content's language
301
 * @param s Language string
302
 * @param b Body of the email
303
 */
304
static void parse_content_language(const char *s, struct Body *b)
305
395
{
306
395
  if (!s || !b)
307
0
    return;
308
309
395
  mutt_debug(LL_DEBUG2, "RFC8255 >> Content-Language set to %s\n", s);
310
395
  mutt_str_replace(&b->language, s);
311
395
}
312
313
/**
314
 * mutt_matches_ignore - Does the string match the ignore list
315
 * @param s String to check
316
 * @retval true String matches
317
 *
318
 * Checks Ignore and UnIgnore using mutt_list_match
319
 */
320
bool mutt_matches_ignore(const char *s)
321
0
{
322
0
  return mutt_list_match(s, &Ignore) && !mutt_list_match(s, &UnIgnore);
323
0
}
324
325
/**
326
 * mutt_check_mime_type - Check a MIME type string
327
 * @param s String to check
328
 * @retval enum ContentType, e.g. #TYPE_TEXT
329
 */
330
enum ContentType mutt_check_mime_type(const char *s)
331
119k
{
332
119k
  if (mutt_istr_equal("text", s))
333
1.05k
    return TYPE_TEXT;
334
118k
  if (mutt_istr_equal("multipart", s))
335
2.03k
    return TYPE_MULTIPART;
336
116k
  if (mutt_istr_equal("x-sun-attachment", s))
337
5.47k
    return TYPE_MULTIPART;
338
110k
  if (mutt_istr_equal("application", s))
339
218
    return TYPE_APPLICATION;
340
110k
  if (mutt_istr_equal("message", s))
341
96.8k
    return TYPE_MESSAGE;
342
13.7k
  if (mutt_istr_equal("image", s))
343
198
    return TYPE_IMAGE;
344
13.5k
  if (mutt_istr_equal("audio", s))
345
196
    return TYPE_AUDIO;
346
13.3k
  if (mutt_istr_equal("video", s))
347
194
    return TYPE_VIDEO;
348
13.1k
  if (mutt_istr_equal("model", s))
349
199
    return TYPE_MODEL;
350
12.9k
  if (mutt_istr_equal("*", s))
351
367
    return TYPE_ANY;
352
12.6k
  if (mutt_istr_equal(".*", s))
353
247
    return TYPE_ANY;
354
355
12.3k
  return TYPE_OTHER;
356
12.6k
}
357
358
/**
359
 * mutt_extract_message_id - Find a message-id
360
 * @param[in]  s String to parse
361
 * @param[out] len Number of bytes of s parsed
362
 * @retval ptr  Message id found
363
 * @retval NULL No more message ids
364
 */
365
char *mutt_extract_message_id(const char *s, size_t *len)
366
1.47M
{
367
1.47M
  if (!s || (*s == '\0'))
368
770
    return NULL;
369
370
1.47M
  char *decoded = mutt_str_dup(s);
371
1.47M
  rfc2047_decode(&decoded);
372
373
1.47M
  char *res = NULL;
374
375
5.74M
  for (const char *p = decoded, *beg = NULL; *p; p++)
376
5.73M
  {
377
5.73M
    if (*p == '<')
378
1.50M
    {
379
1.50M
      beg = p;
380
1.50M
      continue;
381
1.50M
    }
382
383
4.23M
    if (beg && (*p == '>'))
384
1.46M
    {
385
1.46M
      if (len)
386
1.46M
        *len = p - decoded + 1;
387
1.46M
      res = mutt_strn_dup(beg, (p + 1) - beg);
388
1.46M
      break;
389
1.46M
    }
390
4.23M
  }
391
392
1.47M
  FREE(&decoded);
393
1.47M
  return res;
394
1.47M
}
395
396
/**
397
 * mutt_check_encoding - Check the encoding type
398
 * @param c String to check
399
 * @retval enum ContentEncoding, e.g. #ENC_QUOTED_PRINTABLE
400
 */
401
int mutt_check_encoding(const char *c)
402
1.76k
{
403
1.76k
  if (mutt_istr_startswith(c, "7bit"))
404
194
    return ENC_7BIT;
405
1.56k
  if (mutt_istr_startswith(c, "8bit"))
406
196
    return ENC_8BIT;
407
1.37k
  if (mutt_istr_startswith(c, "binary"))
408
0
    return ENC_BINARY;
409
1.37k
  if (mutt_istr_startswith(c, "quoted-printable"))
410
204
    return ENC_QUOTED_PRINTABLE;
411
1.16k
  if (mutt_istr_startswith(c, "base64"))
412
196
    return ENC_BASE64;
413
972
  if (mutt_istr_startswith(c, "x-uuencode"))
414
0
    return ENC_UUENCODED;
415
972
  if (mutt_istr_startswith(c, "uuencode"))
416
0
    return ENC_UUENCODED;
417
972
  return ENC_OTHER;
418
972
}
419
420
/**
421
 * mutt_parse_content_type - Parse a content type
422
 * @param s String to parse
423
 * @param b Body to save the result
424
 *
425
 * e.g. parse a string "inline" and set #DISP_INLINE.
426
 */
427
void mutt_parse_content_type(const char *s, struct Body *b)
428
119k
{
429
119k
  if (!s || !b)
430
0
    return;
431
432
119k
  FREE(&b->subtype);
433
119k
  mutt_param_free(&b->parameter);
434
435
  /* First extract any existing parameters */
436
119k
  char *pc = strchr(s, ';');
437
119k
  if (pc)
438
68.8k
  {
439
68.8k
    *pc++ = 0;
440
70.9k
    while (*pc && isspace(*pc))
441
2.06k
      pc++;
442
68.8k
    parse_parameters(&b->parameter, pc, false);
443
444
    /* Some pre-RFC1521 gateways still use the "name=filename" convention,
445
     * but if a filename has already been set in the content-disposition,
446
     * let that take precedence, and don't set it here */
447
68.8k
    pc = mutt_param_get(&b->parameter, "name");
448
68.8k
    if (pc && !b->filename)
449
405
      b->filename = mutt_str_dup(pc);
450
451
    /* this is deep and utter perversion */
452
68.8k
    pc = mutt_param_get(&b->parameter, "conversions");
453
68.8k
    if (pc)
454
276
      b->encoding = mutt_check_encoding(pc);
455
68.8k
  }
456
457
  /* Now get the subtype */
458
119k
  char *subtype = strchr(s, '/');
459
119k
  if (subtype)
460
3.13k
  {
461
3.13k
    *subtype++ = '\0';
462
33.0k
    for (pc = subtype; *pc && !isspace(*pc) && (*pc != ';'); pc++)
463
29.9k
      ; // do nothing
464
465
3.13k
    *pc = '\0';
466
3.13k
    mutt_str_replace(&b->subtype, subtype);
467
3.13k
  }
468
469
  /* Finally, get the major type */
470
119k
  b->type = mutt_check_mime_type(s);
471
472
119k
  if (mutt_istr_equal("x-sun-attachment", s))
473
5.47k
    mutt_str_replace(&b->subtype, "x-sun-attachment");
474
475
119k
  if (b->type == TYPE_OTHER)
476
12.3k
  {
477
12.3k
    mutt_str_replace(&b->xtype, s);
478
12.3k
  }
479
480
119k
  if (!b->subtype)
481
111k
  {
482
    /* Some older non-MIME mailers (i.e., mailtool, elm) have a content-type
483
     * field, so we can attempt to convert the type to Body here.  */
484
111k
    if (b->type == TYPE_TEXT)
485
618
    {
486
618
      b->subtype = mutt_str_dup("plain");
487
618
    }
488
110k
    else if (b->type == TYPE_AUDIO)
489
196
    {
490
196
      b->subtype = mutt_str_dup("basic");
491
196
    }
492
110k
    else if (b->type == TYPE_MESSAGE)
493
95.2k
    {
494
95.2k
      b->subtype = mutt_str_dup("rfc822");
495
95.2k
    }
496
15.4k
    else if (b->type == TYPE_OTHER)
497
12.0k
    {
498
12.0k
      char buf[128] = { 0 };
499
500
12.0k
      b->type = TYPE_APPLICATION;
501
12.0k
      snprintf(buf, sizeof(buf), "x-%s", s);
502
12.0k
      b->subtype = mutt_str_dup(buf);
503
12.0k
    }
504
3.32k
    else
505
3.32k
    {
506
3.32k
      b->subtype = mutt_str_dup("x-unknown");
507
3.32k
    }
508
111k
  }
509
510
  /* Default character set for text types. */
511
119k
  if (b->type == TYPE_TEXT)
512
1.05k
  {
513
1.05k
    pc = mutt_param_get(&b->parameter, "charset");
514
1.05k
    if (pc)
515
427
    {
516
      /* Microsoft Outlook seems to think it is necessary to repeat
517
       * charset=, strip it off not to confuse ourselves */
518
427
      if (mutt_istrn_equal(pc, "charset=", sizeof("charset=") - 1))
519
203
        mutt_param_set(&b->parameter, "charset", pc + (sizeof("charset=") - 1));
520
427
    }
521
628
    else
522
628
    {
523
628
      mutt_param_set(&b->parameter, "charset",
524
628
                     mutt_ch_get_default_charset(cc_assumed_charset()));
525
628
    }
526
1.05k
  }
527
119k
}
528
529
#ifdef USE_AUTOCRYPT
530
/**
531
 * parse_autocrypt - Parse an Autocrypt header line
532
 * @param head Autocrypt header to insert before
533
 * @param s    Header string to parse
534
 * @retval ptr New AutocryptHeader inserted before head
535
 */
536
static struct AutocryptHeader *parse_autocrypt(struct AutocryptHeader *head, const char *s)
537
{
538
  struct AutocryptHeader *autocrypt = mutt_autocrypthdr_new();
539
  autocrypt->next = head;
540
541
  struct ParameterList pl = TAILQ_HEAD_INITIALIZER(pl);
542
  parse_parameters(&pl, s, true);
543
  if (TAILQ_EMPTY(&pl))
544
  {
545
    autocrypt->invalid = true;
546
    goto cleanup;
547
  }
548
549
  struct Parameter *p = NULL;
550
  TAILQ_FOREACH(p, &pl, entries)
551
  {
552
    if (mutt_istr_equal(p->attribute, "addr"))
553
    {
554
      if (autocrypt->addr)
555
      {
556
        autocrypt->invalid = true;
557
        goto cleanup;
558
      }
559
      autocrypt->addr = p->value;
560
      p->value = NULL;
561
    }
562
    else if (mutt_istr_equal(p->attribute, "prefer-encrypt"))
563
    {
564
      if (mutt_istr_equal(p->value, "mutual"))
565
        autocrypt->prefer_encrypt = true;
566
    }
567
    else if (mutt_istr_equal(p->attribute, "keydata"))
568
    {
569
      if (autocrypt->keydata)
570
      {
571
        autocrypt->invalid = true;
572
        goto cleanup;
573
      }
574
      autocrypt->keydata = p->value;
575
      p->value = NULL;
576
    }
577
    else if (p->attribute && (p->attribute[0] != '_'))
578
    {
579
      autocrypt->invalid = true;
580
      goto cleanup;
581
    }
582
  }
583
584
  /* Checking the addr against From, and for multiple valid headers
585
   * occurs later, after all the headers are parsed. */
586
  if (!autocrypt->addr || !autocrypt->keydata)
587
    autocrypt->invalid = true;
588
589
cleanup:
590
  mutt_param_free(&pl);
591
  return autocrypt;
592
}
593
#endif
594
595
/**
596
 * rfc2369_first_mailto - Extract the first mailto: URL from a RFC2369 list
597
 * @param body Body of the header
598
 * @retval ptr First mailto: URL found, or NULL if none was found
599
 */
600
static char *rfc2369_first_mailto(const char *body)
601
1.92k
{
602
2.87k
  for (const char *beg = body, *end = NULL; beg; beg = strchr(end, ','))
603
2.61k
  {
604
2.61k
    beg = strchr(beg, '<');
605
2.61k
    if (!beg)
606
758
    {
607
758
      break;
608
758
    }
609
1.85k
    beg++;
610
1.85k
    end = strchr(beg, '>');
611
1.85k
    if (!end)
612
282
    {
613
282
      break;
614
282
    }
615
616
1.57k
    char *mlist = mutt_strn_dup(beg, end - beg);
617
1.57k
    if (url_check_scheme(mlist) == U_MAILTO)
618
617
    {
619
617
      return mlist;
620
617
    }
621
955
    FREE(&mlist);
622
955
  }
623
1.30k
  return NULL;
624
1.92k
}
625
626
/**
627
 * mutt_rfc822_parse_line - Parse an email header
628
 * @param env       Envelope of the email
629
 * @param e         Email
630
 * @param name      Header field name, e.g. 'to'
631
 * @param name_len  Must be equivalent to strlen(name)
632
 * @param body      Header field body, e.g. 'john@example.com'
633
 * @param user_hdrs If true, save into the Envelope's userhdrs
634
 * @param weed      If true, perform header weeding (filtering)
635
 * @param do_2047   If true, perform RFC2047 decoding of the field
636
 * @retval 1 The field is recognised
637
 * @retval 0 The field is not recognised
638
 *
639
 * Process a line from an email header.  Each line that is recognised is parsed
640
 * and the information put in the Envelope or Header.
641
 */
642
int mutt_rfc822_parse_line(struct Envelope *env, struct Email *e,
643
                           const char *name, size_t name_len, const char *body,
644
                           bool user_hdrs, bool weed, bool do_2047)
645
325k
{
646
325k
  if (!env || !name)
647
0
    return 0;
648
649
325k
  bool matched = false;
650
651
325k
  switch (name[0] | 0x20)
652
325k
  {
653
2.08k
    case 'a':
654
2.08k
      if ((name_len == 13) && eqi12(name + 1, "pparently-to"))
655
250
      {
656
250
        mutt_addrlist_parse(&env->to, body);
657
250
        matched = true;
658
250
      }
659
1.83k
      else if ((name_len == 15) && eqi14(name + 1, "pparently-from"))
660
369
      {
661
369
        mutt_addrlist_parse(&env->from, body);
662
369
        matched = true;
663
369
      }
664
#ifdef USE_AUTOCRYPT
665
      else if ((name_len == 9) && eqi8(name + 1, "utocrypt"))
666
      {
667
        const bool c_autocrypt = cs_subset_bool(NeoMutt->sub, "autocrypt");
668
        if (c_autocrypt)
669
        {
670
          env->autocrypt = parse_autocrypt(env->autocrypt, body);
671
          matched = true;
672
        }
673
      }
674
      else if ((name_len == 16) && eqi15(name + 1, "utocrypt-gossip"))
675
      {
676
        const bool c_autocrypt = cs_subset_bool(NeoMutt->sub, "autocrypt");
677
        if (c_autocrypt)
678
        {
679
          env->autocrypt_gossip = parse_autocrypt(env->autocrypt_gossip, body);
680
          matched = true;
681
        }
682
      }
683
#endif
684
2.08k
      break;
685
686
656
    case 'b':
687
656
      if ((name_len == 3) && eqi2(name + 1, "cc"))
688
200
      {
689
200
        mutt_addrlist_parse(&env->bcc, body);
690
200
        matched = true;
691
200
      }
692
656
      break;
693
694
141k
    case 'c':
695
141k
      if ((name_len == 2) && eqi1(name + 1, "c"))
696
3.30k
      {
697
3.30k
        mutt_addrlist_parse(&env->cc, body);
698
3.30k
        matched = true;
699
3.30k
      }
700
138k
      else
701
138k
      {
702
138k
        if ((name_len >= 12) && eqi8(name, "content-"))
703
111k
        {
704
111k
          if ((name_len == 12) && eqi4(name + 8, "type"))
705
102k
          {
706
102k
            if (e)
707
102k
              mutt_parse_content_type(body, e->body);
708
102k
            matched = true;
709
102k
          }
710
9.01k
          else if ((name_len == 16) && eqi8(name + 8, "language"))
711
197
          {
712
197
            if (e)
713
197
              parse_content_language(body, e->body);
714
197
            matched = true;
715
197
          }
716
8.82k
          else if ((name_len == 25) && eqi17(name + 8, "transfer-encoding"))
717
899
          {
718
899
            if (e)
719
899
              e->body->encoding = mutt_check_encoding(body);
720
899
            matched = true;
721
899
          }
722
7.92k
          else if ((name_len == 14) && eqi8(name + 6, "t-length"))
723
3.62k
          {
724
3.62k
            if (e)
725
3.62k
            {
726
3.62k
              unsigned long len = 0;
727
3.62k
              e->body->length = mutt_str_atoul(body, &len) ? MIN(len, CONTENT_TOO_BIG) : -1;
728
3.62k
            }
729
3.62k
            matched = true;
730
3.62k
          }
731
4.30k
          else if ((name_len == 19) && eqi11(name + 8, "description"))
732
200
          {
733
200
            if (e)
734
200
            {
735
200
              mutt_str_replace(&e->body->description, body);
736
200
              rfc2047_decode(&e->body->description);
737
200
            }
738
200
            matched = true;
739
200
          }
740
4.10k
          else if ((name_len == 19) && eqi11(name + 8, "disposition"))
741
1.82k
          {
742
1.82k
            if (e)
743
1.82k
              parse_content_disposition(body, e->body);
744
1.82k
            matched = true;
745
1.82k
          }
746
111k
        }
747
138k
      }
748
141k
      break;
749
750
25.8k
    case 'd':
751
25.8k
      if ((name_len != 4) || !eqi4(name, "date"))
752
1.24k
        break;
753
754
24.6k
      mutt_str_replace(&env->date, body);
755
24.6k
      if (e)
756
23.8k
      {
757
23.8k
        struct Tz tz = { 0 };
758
23.8k
        e->date_sent = mutt_date_parse_date(body, &tz);
759
23.8k
        if (e->date_sent > 0)
760
3.59k
        {
761
3.59k
          e->zhours = tz.zhours;
762
3.59k
          e->zminutes = tz.zminutes;
763
3.59k
          e->zoccident = tz.zoccident;
764
3.59k
        }
765
23.8k
      }
766
24.6k
      matched = true;
767
24.6k
      break;
768
769
2.34k
    case 'e':
770
2.34k
      if ((name_len == 7) && eqi6(name + 1, "xpires") && e &&
771
2.34k
          (mutt_date_parse_date(body, NULL) < mutt_date_now()))
772
764
      {
773
764
        e->expired = true;
774
764
      }
775
2.34k
      break;
776
777
8.16k
    case 'f':
778
8.16k
      if ((name_len == 4) && eqi4(name, "from"))
779
1.26k
      {
780
1.26k
        mutt_addrlist_parse(&env->from, body);
781
1.26k
        matched = true;
782
1.26k
      }
783
6.90k
      else if ((name_len == 11) && eqi10(name + 1, "ollowup-to"))
784
2.75k
      {
785
2.75k
        if (!env->followup_to)
786
2.48k
        {
787
2.48k
          env->followup_to = mutt_str_dup(mutt_str_skip_whitespace(body));
788
2.48k
          mutt_str_remove_trailing_ws(env->followup_to);
789
2.48k
        }
790
2.75k
        matched = true;
791
2.75k
      }
792
8.16k
      break;
793
794
701
    case 'i':
795
701
      if ((name_len != 11) || !eqi10(name + 1, "n-reply-to"))
796
499
        break;
797
798
202
      mutt_list_free(&env->in_reply_to);
799
202
      parse_references(&env->in_reply_to, body);
800
202
      matched = true;
801
202
      break;
802
803
7.04k
    case 'l':
804
7.04k
      if ((name_len == 5) && eqi4(name + 1, "ines"))
805
2.74k
      {
806
2.74k
        if (e)
807
2.39k
        {
808
2.39k
          unsigned int ui = 0; // we don't want a negative number of lines
809
2.39k
          mutt_str_atoui(body, &ui);
810
2.39k
          e->lines = ui;
811
2.39k
        }
812
813
2.74k
        matched = true;
814
2.74k
      }
815
4.30k
      else if ((name_len == 9) && eqi8(name + 1, "ist-post"))
816
1.33k
      {
817
        /* RFC2369 */
818
1.33k
        if (!mutt_strn_equal(mutt_str_skip_whitespace(body), "NO", 2))
819
1.13k
        {
820
1.13k
          char *mailto = rfc2369_first_mailto(body);
821
1.13k
          if (mailto)
822
222
          {
823
222
            FREE(&env->list_post);
824
222
            env->list_post = mailto;
825
222
            const bool c_auto_subscribe = cs_subset_bool(NeoMutt->sub, "auto_subscribe");
826
222
            if (c_auto_subscribe)
827
0
              mutt_auto_subscribe(env->list_post);
828
222
          }
829
1.13k
        }
830
1.33k
        matched = true;
831
1.33k
      }
832
2.97k
      else if ((name_len == 14) && eqi13(name + 1, "ist-subscribe"))
833
394
      {
834
        /* RFC2369 */
835
394
        char *mailto = rfc2369_first_mailto(body);
836
394
        if (mailto)
837
197
        {
838
197
          FREE(&env->list_subscribe);
839
197
          env->list_subscribe = mailto;
840
197
        }
841
394
        matched = true;
842
394
      }
843
2.57k
      else if ((name_len == 16) && eqi15(name + 1, "ist-unsubscribe"))
844
393
      {
845
        /* RFC2369 */
846
393
        char *mailto = rfc2369_first_mailto(body);
847
393
        if (mailto)
848
198
        {
849
198
          FREE(&env->list_unsubscribe);
850
198
          env->list_unsubscribe = mailto;
851
198
        }
852
393
        matched = true;
853
393
      }
854
7.04k
      break;
855
856
3.47k
    case 'm':
857
3.47k
      if ((name_len == 12) && eqi11(name + 1, "ime-version"))
858
484
      {
859
484
        if (e)
860
290
          e->mime = true;
861
484
        matched = true;
862
484
      }
863
2.99k
      else if ((name_len == 10) && eqi9(name + 1, "essage-id"))
864
441
      {
865
        /* We add a new "Message-ID:" when building a message */
866
441
        FREE(&env->message_id);
867
441
        env->message_id = mutt_extract_message_id(body, NULL);
868
441
        matched = true;
869
441
      }
870
2.55k
      else
871
2.55k
      {
872
2.55k
        if ((name_len >= 13) && eqi4(name + 1, "ail-"))
873
1.53k
        {
874
1.53k
          if ((name_len == 13) && eqi8(name + 5, "reply-to"))
875
571
          {
876
            /* override the Reply-To: field */
877
571
            mutt_addrlist_clear(&env->reply_to);
878
571
            mutt_addrlist_parse(&env->reply_to, body);
879
571
            matched = true;
880
571
          }
881
967
          else if ((name_len == 16) && eqi11(name + 5, "followup-to"))
882
273
          {
883
273
            mutt_addrlist_parse(&env->mail_followup_to, body);
884
273
            matched = true;
885
273
          }
886
1.53k
        }
887
2.55k
      }
888
3.47k
      break;
889
890
1.72k
    case 'n':
891
1.72k
      if ((name_len == 10) && eqi9(name + 1, "ewsgroups"))
892
230
      {
893
230
        FREE(&env->newsgroups);
894
230
        env->newsgroups = mutt_str_dup(mutt_str_skip_whitespace(body));
895
230
        mutt_str_remove_trailing_ws(env->newsgroups);
896
230
        matched = true;
897
230
      }
898
1.72k
      break;
899
900
6.88k
    case 'o':
901
      /* field 'Organization:' saves only for pager! */
902
6.88k
      if ((name_len == 12) && eqi11(name + 1, "rganization"))
903
703
      {
904
703
        if (!env->organization && !mutt_istr_equal(body, "unknown"))
905
272
          env->organization = mutt_str_dup(body);
906
703
      }
907
6.88k
      break;
908
909
18.4k
    case 'r':
910
18.4k
      if ((name_len == 10) && eqi9(name + 1, "eferences"))
911
6.35k
      {
912
6.35k
        mutt_list_free(&env->references);
913
6.35k
        parse_references(&env->references, body);
914
6.35k
        matched = true;
915
6.35k
      }
916
12.1k
      else if ((name_len == 8) && eqi8(name, "reply-to"))
917
241
      {
918
241
        mutt_addrlist_parse(&env->reply_to, body);
919
241
        matched = true;
920
241
      }
921
11.8k
      else if ((name_len == 11) && eqi10(name + 1, "eturn-path"))
922
208
      {
923
208
        mutt_addrlist_parse(&env->return_path, body);
924
208
        matched = true;
925
208
      }
926
11.6k
      else if ((name_len == 8) && eqi8(name, "received"))
927
2.26k
      {
928
2.26k
        if (e && (e->received == 0))
929
1.50k
        {
930
1.50k
          char *d = strrchr(body, ';');
931
1.50k
          if (d)
932
723
          {
933
723
            d = mutt_str_skip_email_wsp(d + 1);
934
723
            e->received = mutt_date_parse_date(d, NULL);
935
723
          }
936
1.50k
        }
937
2.26k
      }
938
18.4k
      break;
939
940
18.2k
    case 's':
941
18.2k
      if ((name_len == 7) && eqi6(name + 1, "ubject"))
942
981
      {
943
981
        if (!env->subject)
944
728
          mutt_env_set_subject(env, body);
945
981
        matched = true;
946
981
      }
947
17.2k
      else if ((name_len == 6) && eqi5(name + 1, "ender"))
948
198
      {
949
198
        mutt_addrlist_parse(&env->sender, body);
950
198
        matched = true;
951
198
      }
952
17.1k
      else if ((name_len == 6) && eqi5(name + 1, "tatus"))
953
865
      {
954
865
        if (e)
955
638
        {
956
3.57k
          while (*body)
957
2.93k
          {
958
2.93k
            switch (*body)
959
2.93k
            {
960
204
              case 'O':
961
204
              {
962
204
                e->old = true;
963
204
                break;
964
0
              }
965
210
              case 'R':
966
210
                e->read = true;
967
210
                break;
968
684
              case 'r':
969
684
                e->replied = true;
970
684
                break;
971
2.93k
            }
972
2.93k
            body++;
973
2.93k
          }
974
638
        }
975
865
        matched = true;
976
865
      }
977
16.2k
      else if (e && (name_len == 10) && eqi1(name + 1, "u") &&
978
16.2k
               (eqi8(name + 2, "persedes") || eqi8(name + 2, "percedes")))
979
433
      {
980
433
        FREE(&env->supersedes);
981
433
        env->supersedes = mutt_str_dup(body);
982
433
      }
983
18.2k
      break;
984
985
18.2k
    case 't':
986
13.3k
      if ((name_len == 2) && eqi1(name + 1, "o"))
987
12.0k
      {
988
12.0k
        mutt_addrlist_parse(&env->to, body);
989
12.0k
        matched = true;
990
12.0k
      }
991
13.3k
      break;
992
993
4.12k
    case 'x':
994
4.12k
      if ((name_len == 8) && eqi8(name, "x-status"))
995
435
      {
996
435
        if (e)
997
236
        {
998
1.41k
          while (*body)
999
1.18k
          {
1000
1.18k
            switch (*body)
1001
1.18k
            {
1002
216
              case 'A':
1003
216
                e->replied = true;
1004
216
                break;
1005
331
              case 'D':
1006
331
                e->deleted = true;
1007
331
                break;
1008
198
              case 'F':
1009
198
                e->flagged = true;
1010
198
                break;
1011
437
              default:
1012
437
                break;
1013
1.18k
            }
1014
1.18k
            body++;
1015
1.18k
          }
1016
236
        }
1017
435
        matched = true;
1018
435
      }
1019
3.69k
      else if ((name_len == 7) && eqi6(name + 1, "-label"))
1020
205
      {
1021
205
        FREE(&env->x_label);
1022
205
        env->x_label = mutt_str_dup(body);
1023
205
        matched = true;
1024
205
      }
1025
3.48k
      else if ((name_len == 12) && eqi11(name + 1, "-comment-to"))
1026
636
      {
1027
636
        if (!env->x_comment_to)
1028
284
          env->x_comment_to = mutt_str_dup(body);
1029
636
        matched = true;
1030
636
      }
1031
2.85k
      else if ((name_len == 4) && eqi4(name, "xref"))
1032
455
      {
1033
455
        if (!env->xref)
1034
230
          env->xref = mutt_str_dup(body);
1035
455
        matched = true;
1036
455
      }
1037
2.39k
      else if ((name_len == 13) && eqi12(name + 1, "-original-to"))
1038
441
      {
1039
441
        mutt_addrlist_parse(&env->x_original_to, body);
1040
441
        matched = true;
1041
441
      }
1042
4.12k
      break;
1043
1044
70.1k
    default:
1045
70.1k
      break;
1046
325k
  }
1047
1048
  /* Keep track of the user-defined headers */
1049
325k
  if (!matched && user_hdrs)
1050
0
  {
1051
0
    const bool c_weed = cs_subset_bool(NeoMutt->sub, "weed");
1052
0
    char *dup = NULL;
1053
0
    mutt_str_asprintf(&dup, "%s: %s", name, body);
1054
1055
0
    if (!weed || !c_weed || !mutt_matches_ignore(dup))
1056
0
    {
1057
0
      struct ListNode *np = mutt_list_insert_tail(&env->userhdrs, dup);
1058
0
      if (do_2047)
1059
0
      {
1060
0
        rfc2047_decode(&np->data);
1061
0
      }
1062
0
    }
1063
0
    else
1064
0
    {
1065
0
      FREE(&dup);
1066
0
    }
1067
0
  }
1068
1069
325k
  return matched;
1070
325k
}
1071
1072
/**
1073
 * mutt_rfc822_read_line - Read a header line from a file
1074
 * @param fp      File to read from
1075
 * @param buf     Buffer to store the result
1076
 * @retval num Number of bytes read from fp
1077
 *
1078
 * Reads an arbitrarily long header field, and looks ahead for continuation
1079
 * lines.
1080
 */
1081
size_t mutt_rfc822_read_line(FILE *fp, struct Buffer *buf)
1082
544k
{
1083
544k
  if (!fp || !buf)
1084
0
    return 0;
1085
1086
544k
  size_t read = 0;
1087
544k
  char line[1024] = { 0 }; /* RFC2822 specifies a maximum line length of 998 */
1088
1089
544k
  buf_reset(buf);
1090
544k
  while (true)
1091
773k
  {
1092
773k
    if (!fgets(line, sizeof(line), fp))
1093
24.9k
    {
1094
24.9k
      return 0;
1095
24.9k
    }
1096
1097
748k
    const size_t linelen = mutt_str_len(line);
1098
748k
    if (linelen == 0)
1099
52.3k
    {
1100
52.3k
      break;
1101
52.3k
    }
1102
1103
696k
    if (isspace(line[0]) && buf_is_empty(buf))
1104
60.4k
    {
1105
60.4k
      read = linelen;
1106
60.4k
      break;
1107
60.4k
    }
1108
1109
635k
    read += linelen;
1110
1111
635k
    size_t off = linelen - 1;
1112
635k
    if (line[off] == '\n')
1113
409k
    {
1114
      /* We did get a full line: remove trailing space */
1115
409k
      do
1116
414k
      {
1117
414k
        line[off] = '\0';
1118
414k
      } while (off && isspace(line[--off]));
1119
1120
      /* check to see if the next line is a continuation line */
1121
409k
      int ch = fgetc(fp);
1122
409k
      if ((ch != ' ') && (ch != '\t'))
1123
406k
      {
1124
        /* next line is a separate header field or EOH */
1125
406k
        ungetc(ch, fp);
1126
406k
        buf_addstr(buf, line);
1127
406k
        break;
1128
406k
      }
1129
2.92k
      read++;
1130
1131
      /* eat tabs and spaces from the beginning of the continuation line */
1132
3.79k
      while (((ch = fgetc(fp)) == ' ') || (ch == '\t'))
1133
871
      {
1134
871
        read++;
1135
871
      }
1136
1137
2.92k
      ungetc(ch, fp);
1138
2.92k
      line[off + 1] = ' '; /* string is still terminated because we removed
1139
                              at least one whitespace char above */
1140
2.92k
    }
1141
1142
229k
    buf_addstr(buf, line);
1143
229k
  }
1144
1145
519k
  return read;
1146
544k
}
1147
1148
/**
1149
 * mutt_rfc822_read_header - Parses an RFC822 header
1150
 * @param fp        Stream to read from
1151
 * @param e         Current Email (optional)
1152
 * @param user_hdrs If set, store user headers
1153
 *                  Used for recall-message and postpone modes
1154
 * @param weed      If this parameter is set and the user has activated the
1155
 *                  $weed option, honor the header weed list for user headers.
1156
 *                  Used for recall-message
1157
 * @retval ptr Newly allocated envelope structure
1158
 *
1159
 * Caller should free the Envelope using mutt_env_free().
1160
 */
1161
struct Envelope *mutt_rfc822_read_header(FILE *fp, struct Email *e, bool user_hdrs, bool weed)
1162
109k
{
1163
109k
  if (!fp)
1164
0
    return NULL;
1165
1166
109k
  struct Envelope *env = mutt_env_new();
1167
109k
  char *p = NULL;
1168
109k
  LOFF_T loc = e ? e->offset : ftello(fp);
1169
109k
  if (loc < 0)
1170
0
  {
1171
0
    mutt_debug(LL_DEBUG1, "ftello: %s (errno %d)\n", strerror(errno), errno);
1172
0
    loc = 0;
1173
0
  }
1174
1175
109k
  struct Buffer *line = buf_pool_get();
1176
1177
109k
  if (e)
1178
109k
  {
1179
109k
    if (!e->body)
1180
109k
    {
1181
109k
      e->body = mutt_body_new();
1182
1183
      /* set the defaults from RFC1521 */
1184
109k
      e->body->type = TYPE_TEXT;
1185
109k
      e->body->subtype = mutt_str_dup("plain");
1186
109k
      e->body->encoding = ENC_7BIT;
1187
109k
      e->body->length = -1;
1188
1189
      /* RFC2183 says this is arbitrary */
1190
109k
      e->body->disposition = DISP_INLINE;
1191
109k
    }
1192
109k
  }
1193
1194
109k
  while (true)
1195
469k
  {
1196
469k
    LOFF_T line_start_loc = loc;
1197
469k
    size_t len = mutt_rfc822_read_line(fp, line);
1198
469k
    if (buf_is_empty(line))
1199
100k
    {
1200
100k
      break;
1201
100k
    }
1202
368k
    loc += len;
1203
368k
    const char *lines = buf_string(line);
1204
368k
    p = strpbrk(lines, ": \t");
1205
368k
    if (!p || (*p != ':'))
1206
9.18k
    {
1207
9.18k
      char return_path[1024] = { 0 };
1208
9.18k
      time_t t = 0;
1209
1210
      /* some bogus MTAs will quote the original "From " line */
1211
9.18k
      if (mutt_str_startswith(lines, ">From "))
1212
280
      {
1213
280
        continue; /* just ignore */
1214
280
      }
1215
8.90k
      else if (is_from(lines, return_path, sizeof(return_path), &t))
1216
0
      {
1217
        /* MH sometimes has the From_ line in the middle of the header! */
1218
0
        if (e && (e->received == 0))
1219
0
          e->received = t - mutt_date_local_tz(t);
1220
0
        continue;
1221
0
      }
1222
1223
      /* We need to seek back to the start of the body. Note that we
1224
       * keep track of loc ourselves, since calling ftello() incurs
1225
       * a syscall, which can be expensive to do for every single line */
1226
8.90k
      (void) mutt_file_seek(fp, line_start_loc, SEEK_SET);
1227
8.90k
      break; /* end of header */
1228
9.18k
    }
1229
359k
    size_t name_len = p - lines;
1230
1231
359k
    char buf[1024] = { 0 };
1232
359k
    if (mutt_replacelist_match(&SpamList, buf, sizeof(buf), lines))
1233
0
    {
1234
0
      if (!mutt_regexlist_match(&NoSpamList, lines))
1235
0
      {
1236
        /* if spam tag already exists, figure out how to amend it */
1237
0
        if ((!buf_is_empty(&env->spam)) && (*buf != '\0'))
1238
0
        {
1239
          /* If `$spam_separator` defined, append with separator */
1240
0
          const char *const c_spam_separator = cs_subset_string(NeoMutt->sub, "spam_separator");
1241
0
          if (c_spam_separator)
1242
0
          {
1243
0
            buf_addstr(&env->spam, c_spam_separator);
1244
0
            buf_addstr(&env->spam, buf);
1245
0
          }
1246
0
          else /* overwrite */
1247
0
          {
1248
0
            buf_reset(&env->spam);
1249
0
            buf_addstr(&env->spam, buf);
1250
0
          }
1251
0
        }
1252
0
        else if (buf_is_empty(&env->spam) && (*buf != '\0'))
1253
0
        {
1254
          /* spam tag is new, and match expr is non-empty; copy */
1255
0
          buf_addstr(&env->spam, buf);
1256
0
        }
1257
0
        else if (buf_is_empty(&env->spam))
1258
0
        {
1259
          /* match expr is empty; plug in null string if no existing tag */
1260
0
          buf_addstr(&env->spam, "");
1261
0
        }
1262
1263
0
        if (!buf_is_empty(&env->spam))
1264
0
          mutt_debug(LL_DEBUG5, "spam = %s\n", env->spam.data);
1265
0
      }
1266
0
    }
1267
1268
359k
    *p = '\0';
1269
359k
    p = mutt_str_skip_email_wsp(p + 1);
1270
359k
    if (*p == '\0')
1271
49.7k
      continue; /* skip empty header fields */
1272
1273
310k
    mutt_rfc822_parse_line(env, e, lines, name_len, p, user_hdrs, weed, true);
1274
310k
  }
1275
1276
109k
  buf_pool_release(&line);
1277
1278
109k
  if (e)
1279
109k
  {
1280
109k
    e->body->hdr_offset = e->offset;
1281
109k
    e->body->offset = ftello(fp);
1282
1283
109k
    rfc2047_decode_envelope(env);
1284
1285
109k
    if (e->received < 0)
1286
655
    {
1287
655
      mutt_debug(LL_DEBUG1, "resetting invalid received time to 0\n");
1288
655
      e->received = 0;
1289
655
    }
1290
1291
    /* check for missing or invalid date */
1292
109k
    if (e->date_sent <= 0)
1293
108k
    {
1294
108k
      mutt_debug(LL_DEBUG1, "no date found, using received time from msg separator\n");
1295
108k
      e->date_sent = e->received;
1296
108k
    }
1297
1298
#ifdef USE_AUTOCRYPT
1299
    const bool c_autocrypt = cs_subset_bool(NeoMutt->sub, "autocrypt");
1300
    if (c_autocrypt)
1301
    {
1302
      mutt_autocrypt_process_autocrypt_header(e, env);
1303
      /* No sense in taking up memory after the header is processed */
1304
      mutt_autocrypthdr_free(&env->autocrypt);
1305
    }
1306
#endif
1307
109k
  }
1308
1309
109k
  return env;
1310
109k
}
1311
1312
/**
1313
 * mutt_read_mime_header - Parse a MIME header
1314
 * @param fp      stream to read from
1315
 * @param digest  true if reading subparts of a multipart/digest
1316
 * @retval ptr New Body containing parsed structure
1317
 */
1318
struct Body *mutt_read_mime_header(FILE *fp, bool digest)
1319
29.0k
{
1320
29.0k
  if (!fp)
1321
0
    return NULL;
1322
1323
29.0k
  struct Body *b = mutt_body_new();
1324
29.0k
  struct Envelope *env = mutt_env_new();
1325
29.0k
  char *c = NULL;
1326
29.0k
  struct Buffer *buf = buf_pool_get();
1327
29.0k
  bool matched = false;
1328
1329
29.0k
  b->hdr_offset = ftello(fp);
1330
1331
29.0k
  b->encoding = ENC_7BIT; /* default from RFC1521 */
1332
29.0k
  b->type = digest ? TYPE_MESSAGE : TYPE_TEXT;
1333
29.0k
  b->disposition = DISP_INLINE;
1334
1335
75.1k
  while (mutt_rfc822_read_line(fp, buf) != 0)
1336
69.0k
  {
1337
69.0k
    const char *line = buf_string(buf);
1338
    /* Find the value of the current header */
1339
69.0k
    c = strchr(line, ':');
1340
69.0k
    if (c)
1341
46.1k
    {
1342
46.1k
      *c = '\0';
1343
46.1k
      c = mutt_str_skip_email_wsp(c + 1);
1344
46.1k
      if (*c == '\0')
1345
7.50k
      {
1346
7.50k
        mutt_debug(LL_DEBUG1, "skipping empty header field: %s\n", line);
1347
7.50k
        continue;
1348
7.50k
      }
1349
46.1k
    }
1350
22.9k
    else
1351
22.9k
    {
1352
22.9k
      mutt_debug(LL_DEBUG1, "bogus MIME header: %s\n", line);
1353
22.9k
      break;
1354
22.9k
    }
1355
1356
38.6k
    size_t plen = mutt_istr_startswith(line, "content-");
1357
38.6k
    if (plen != 0)
1358
18.9k
    {
1359
18.9k
      if (mutt_istr_equal("type", line + plen))
1360
16.4k
      {
1361
16.4k
        mutt_parse_content_type(c, b);
1362
16.4k
      }
1363
2.44k
      else if (mutt_istr_equal("language", line + plen))
1364
198
      {
1365
198
        parse_content_language(c, b);
1366
198
      }
1367
2.25k
      else if (mutt_istr_equal("transfer-encoding", line + plen))
1368
313
      {
1369
313
        b->encoding = mutt_check_encoding(c);
1370
313
      }
1371
1.93k
      else if (mutt_istr_equal("disposition", line + plen))
1372
377
      {
1373
377
        parse_content_disposition(c, b);
1374
377
      }
1375
1.56k
      else if (mutt_istr_equal("description", line + plen))
1376
208
      {
1377
208
        mutt_str_replace(&b->description, c);
1378
208
        rfc2047_decode(&b->description);
1379
208
      }
1380
1.35k
      else if (mutt_istr_equal("id", line + plen))
1381
882
      {
1382
        // strip <angle braces> from Content-ID: header
1383
882
        char *id = c;
1384
882
        int cid_len = mutt_str_len(c);
1385
882
        if (cid_len > 2)
1386
619
        {
1387
619
          if (id[0] == '<')
1388
211
          {
1389
211
            id++;
1390
211
            cid_len--;
1391
211
          }
1392
619
          if (id[cid_len - 1] == '>')
1393
288
            id[cid_len - 1] = '\0';
1394
619
        }
1395
882
        mutt_param_set(&b->parameter, "content-id", id);
1396
882
      }
1397
18.9k
    }
1398
19.6k
    else if ((plen = mutt_istr_startswith(line, "x-sun-")))
1399
4.46k
    {
1400
4.46k
      if (mutt_istr_equal("data-type", line + plen))
1401
199
      {
1402
199
        mutt_parse_content_type(c, b);
1403
199
      }
1404
4.26k
      else if (mutt_istr_equal("encoding-info", line + plen))
1405
274
      {
1406
274
        b->encoding = mutt_check_encoding(c);
1407
274
      }
1408
3.98k
      else if (mutt_istr_equal("content-lines", line + plen))
1409
3.27k
      {
1410
3.27k
        mutt_param_set(&b->parameter, "content-lines", c);
1411
3.27k
      }
1412
711
      else if (mutt_istr_equal("data-description", line + plen))
1413
202
      {
1414
202
        mutt_str_replace(&b->description, c);
1415
202
        rfc2047_decode(&b->description);
1416
202
      }
1417
4.46k
    }
1418
15.1k
    else
1419
15.1k
    {
1420
15.1k
      if (mutt_rfc822_parse_line(env, NULL, line, strlen(line), c, false, false, false))
1421
4.76k
      {
1422
4.76k
        matched = true;
1423
4.76k
      }
1424
15.1k
    }
1425
38.6k
  }
1426
29.0k
  b->offset = ftello(fp); /* Mark the start of the real data */
1427
29.0k
  if ((b->type == TYPE_TEXT) && !b->subtype)
1428
9.42k
    b->subtype = mutt_str_dup("plain");
1429
19.6k
  else if ((b->type == TYPE_MESSAGE) && !b->subtype)
1430
4.79k
    b->subtype = mutt_str_dup("rfc822");
1431
1432
29.0k
  buf_pool_release(&buf);
1433
1434
29.0k
  if (matched)
1435
1.31k
  {
1436
1.31k
    b->mime_headers = env;
1437
1.31k
    rfc2047_decode_envelope(b->mime_headers);
1438
1.31k
  }
1439
27.7k
  else
1440
27.7k
  {
1441
27.7k
    mutt_env_free(&env);
1442
27.7k
  }
1443
1444
29.0k
  return b;
1445
29.0k
}
1446
1447
/**
1448
 * mutt_is_message_type - Determine if a mime type matches a message or not
1449
 * @param type    Message type enum value
1450
 * @param subtype Message subtype
1451
 * @retval true  Type is message/news or message/rfc822
1452
 * @retval false Otherwise
1453
 */
1454
bool mutt_is_message_type(int type, const char *subtype)
1455
100k
{
1456
100k
  if (type != TYPE_MESSAGE)
1457
0
    return false;
1458
1459
100k
  subtype = NONULL(subtype);
1460
100k
  return (mutt_istr_equal(subtype, "rfc822") ||
1461
100k
          mutt_istr_equal(subtype, "news") || mutt_istr_equal(subtype, "global"));
1462
100k
}
1463
1464
/**
1465
 * parse_part - Parse a MIME part
1466
 * @param fp      File to read from
1467
 * @param b       Body to store the results in
1468
 * @param counter Number of parts processed so far
1469
 */
1470
static void parse_part(FILE *fp, struct Body *b, int *counter)
1471
135k
{
1472
135k
  if (!fp || !b)
1473
0
    return;
1474
1475
135k
  const char *bound = NULL;
1476
135k
  static unsigned short recurse_level = 0;
1477
1478
135k
  if (recurse_level >= MUTT_MIME_MAX_DEPTH)
1479
463
  {
1480
463
    mutt_debug(LL_DEBUG1, "recurse level too deep. giving up.\n");
1481
463
    return;
1482
463
  }
1483
135k
  recurse_level++;
1484
1485
135k
  switch (b->type)
1486
135k
  {
1487
6.48k
    case TYPE_MULTIPART:
1488
6.48k
      if (mutt_istr_equal(b->subtype, "x-sun-attachment"))
1489
4.77k
        bound = "--------";
1490
1.71k
      else
1491
1.71k
        bound = mutt_param_get(&b->parameter, "boundary");
1492
1493
6.48k
      if (!mutt_file_seek(fp, b->offset, SEEK_SET))
1494
0
      {
1495
0
        goto bail;
1496
0
      }
1497
6.48k
      b->parts = parse_multipart(fp, bound, b->offset + b->length,
1498
6.48k
                                 mutt_istr_equal("digest", b->subtype), counter);
1499
6.48k
      break;
1500
1501
100k
    case TYPE_MESSAGE:
1502
100k
      if (!b->subtype)
1503
0
        break;
1504
1505
100k
      if (!mutt_file_seek(fp, b->offset, SEEK_SET))
1506
0
      {
1507
0
        goto bail;
1508
0
      }
1509
100k
      if (mutt_is_message_type(b->type, b->subtype))
1510
99.3k
        b->parts = rfc822_parse_message(fp, b, counter);
1511
1.07k
      else if (mutt_istr_equal(b->subtype, "external-body"))
1512
883
        b->parts = mutt_read_mime_header(fp, 0);
1513
195
      else
1514
195
        goto bail;
1515
100k
      break;
1516
1517
100k
    default:
1518
28.6k
      goto bail;
1519
135k
  }
1520
1521
  /* try to recover from parsing error */
1522
106k
  if (!b->parts)
1523
2.03k
  {
1524
2.03k
    b->type = TYPE_TEXT;
1525
2.03k
    mutt_str_replace(&b->subtype, "plain");
1526
2.03k
  }
1527
135k
bail:
1528
135k
  recurse_level--;
1529
135k
}
1530
1531
/**
1532
 * parse_multipart - Parse a multipart structure
1533
 * @param fp       Stream to read from
1534
 * @param boundary Body separator
1535
 * @param end_off  Length of the multipart body (used when the final
1536
 *                 boundary is missing to avoid reading too far)
1537
 * @param digest   true if reading a multipart/digest
1538
 * @param counter  Number of parts processed so far
1539
 * @retval ptr New Body containing parsed structure
1540
 */
1541
static struct Body *parse_multipart(FILE *fp, const char *boundary,
1542
                                    LOFF_T end_off, bool digest, int *counter)
1543
6.48k
{
1544
6.48k
  if (!fp)
1545
0
    return NULL;
1546
1547
6.48k
  if (!boundary)
1548
279
  {
1549
279
    mutt_error(_("multipart message has no boundary parameter"));
1550
279
    return NULL;
1551
279
  }
1552
1553
6.20k
  char buf[1024] = { 0 };
1554
6.20k
  struct Body *head = NULL, *last = NULL, *new_body = NULL;
1555
6.20k
  bool final = false; /* did we see the ending boundary? */
1556
1557
6.20k
  const size_t blen = mutt_str_len(boundary);
1558
72.9k
  while ((ftello(fp) < end_off) && fgets(buf, sizeof(buf), fp))
1559
69.7k
  {
1560
69.7k
    const size_t len = mutt_str_len(buf);
1561
1562
69.7k
    const size_t crlf = ((len > 1) && (buf[len - 2] == '\r')) ? 1 : 0;
1563
1564
69.7k
    if ((buf[0] == '-') && (buf[1] == '-') && mutt_str_startswith(buf + 2, boundary))
1565
31.2k
    {
1566
31.2k
      if (last)
1567
25.6k
      {
1568
25.6k
        last->length = ftello(fp) - last->offset - len - 1 - crlf;
1569
25.6k
        if (last->parts && (last->parts->length == 0))
1570
0
          last->parts->length = ftello(fp) - last->parts->offset - len - 1 - crlf;
1571
        /* if the body is empty, we can end up with a -1 length */
1572
25.6k
        if (last->length < 0)
1573
11.2k
          last->length = 0;
1574
25.6k
      }
1575
1576
31.2k
      if (len > 0)
1577
31.2k
      {
1578
        /* Remove any trailing whitespace, up to the length of the boundary */
1579
50.1k
        for (size_t i = len - 1; isspace(buf[i]) && (i >= (blen + 2)); i--)
1580
18.8k
          buf[i] = '\0';
1581
31.2k
      }
1582
1583
      /* Check for the end boundary */
1584
31.2k
      if (mutt_str_equal(buf + blen + 2, "--"))
1585
389
      {
1586
389
        final = true;
1587
389
        break; /* done parsing */
1588
389
      }
1589
30.8k
      else if (buf[2 + blen] == '\0')
1590
28.2k
      {
1591
28.2k
        new_body = mutt_read_mime_header(fp, digest);
1592
28.2k
        if (!new_body)
1593
0
          break;
1594
1595
28.2k
        if (mutt_param_get(&new_body->parameter, "content-lines"))
1596
2.99k
        {
1597
2.99k
          int lines = 0;
1598
2.99k
          mutt_str_atoi(mutt_param_get(&new_body->parameter, "content-lines"), &lines);
1599
101k
          for (; lines > 0; lines--)
1600
100k
            if ((ftello(fp) >= end_off) || !fgets(buf, sizeof(buf), fp))
1601
1.42k
              break;
1602
2.99k
        }
1603
1604
        /* Consistency checking - catch bad attachment end boundaries */
1605
28.2k
        if (new_body->offset > end_off)
1606
1.28k
        {
1607
1.28k
          mutt_body_free(&new_body);
1608
1.28k
          break;
1609
1.28k
        }
1610
26.9k
        if (head)
1611
22.4k
        {
1612
22.4k
          last->next = new_body;
1613
22.4k
          last = new_body;
1614
22.4k
        }
1615
4.44k
        else
1616
4.44k
        {
1617
4.44k
          last = new_body;
1618
4.44k
          head = new_body;
1619
4.44k
        }
1620
1621
        /* It seems more intuitive to add the counter increment to
1622
         * parse_part(), but we want to stop the case where a multipart
1623
         * contains thousands of tiny parts before the memory and data
1624
         * structures are allocated.  */
1625
26.9k
        if (++(*counter) >= MUTT_MIME_MAX_PARTS)
1626
1.31k
          break;
1627
26.9k
      }
1628
31.2k
    }
1629
69.7k
  }
1630
1631
  /* in case of missing end boundary, set the length to something reasonable */
1632
6.20k
  if (last && (last->length == 0) && !final)
1633
3.66k
    last->length = end_off - last->offset;
1634
1635
  /* parse recursive MIME parts */
1636
33.1k
  for (last = head; last; last = last->next)
1637
26.9k
    parse_part(fp, last, counter);
1638
1639
6.20k
  return head;
1640
6.48k
}
1641
1642
/**
1643
 * rfc822_parse_message - Parse a Message/RFC822 body
1644
 * @param fp      Stream to read from
1645
 * @param parent  Info about the message/rfc822 body part
1646
 * @param counter Number of parts processed so far
1647
 * @retval ptr New Body containing parsed message
1648
 *
1649
 * @note This assumes that 'parent->length' has been set!
1650
 */
1651
static struct Body *rfc822_parse_message(FILE *fp, struct Body *parent, int *counter)
1652
99.3k
{
1653
99.3k
  if (!fp || !parent)
1654
0
    return NULL;
1655
1656
99.3k
  parent->email = email_new();
1657
99.3k
  parent->email->offset = ftello(fp);
1658
99.3k
  parent->email->env = mutt_rfc822_read_header(fp, parent->email, false, false);
1659
99.3k
  struct Body *msg = parent->email->body;
1660
1661
  /* ignore the length given in the content-length since it could be wrong
1662
   * and we already have the info to calculate the correct length */
1663
  /* if (msg->length == -1) */
1664
99.3k
  msg->length = parent->length - (msg->offset - parent->offset);
1665
1666
  /* if body of this message is empty, we can end up with a negative length */
1667
99.3k
  if (msg->length < 0)
1668
93.3k
    msg->length = 0;
1669
1670
99.3k
  parse_part(fp, msg, counter);
1671
99.3k
  return msg;
1672
99.3k
}
1673
1674
/**
1675
 * mutt_parse_mailto - Parse a mailto:// url
1676
 * @param[in]  env  Envelope to fill
1677
 * @param[out] body Body to
1678
 * @param[in]  src  String to parse
1679
 * @retval true  Success
1680
 * @retval false Error
1681
 */
1682
bool mutt_parse_mailto(struct Envelope *env, char **body, const char *src)
1683
0
{
1684
0
  if (!env || !src)
1685
0
    return false;
1686
1687
0
  struct Url *url = url_parse(src);
1688
0
  if (!url)
1689
0
    return false;
1690
1691
0
  if (url->host)
1692
0
  {
1693
    /* this is not a path-only URL */
1694
0
    url_free(&url);
1695
0
    return false;
1696
0
  }
1697
1698
0
  mutt_addrlist_parse(&env->to, url->path);
1699
1700
0
  struct UrlQuery *np;
1701
0
  STAILQ_FOREACH(np, &url->query_strings, entries)
1702
0
  {
1703
0
    const char *tag = np->name;
1704
0
    char *value = np->value;
1705
    /* Determine if this header field is on the allowed list.  Since NeoMutt
1706
     * interprets some header fields specially (such as
1707
     * "Attach: ~/.gnupg/secring.gpg"), care must be taken to ensure that
1708
     * only safe fields are allowed.
1709
     *
1710
     * RFC2368, "4. Unsafe headers"
1711
     * The user agent interpreting a mailto URL SHOULD choose not to create
1712
     * a message if any of the headers are considered dangerous; it may also
1713
     * choose to create a message with only a subset of the headers given in
1714
     * the URL.  */
1715
0
    if (mutt_list_match(tag, &MailToAllow))
1716
0
    {
1717
0
      if (mutt_istr_equal(tag, "body"))
1718
0
      {
1719
0
        if (body)
1720
0
          mutt_str_replace(body, value);
1721
0
      }
1722
0
      else
1723
0
      {
1724
0
        char *scratch = NULL;
1725
0
        size_t taglen = mutt_str_len(tag);
1726
1727
0
        mutt_str_asprintf(&scratch, "%s: %s", tag, value);
1728
0
        scratch[taglen] = 0; /* overwrite the colon as mutt_rfc822_parse_line expects */
1729
0
        value = mutt_str_skip_email_wsp(&scratch[taglen + 1]);
1730
0
        mutt_rfc822_parse_line(env, NULL, scratch, taglen, value, true, false, true);
1731
0
        FREE(&scratch);
1732
0
      }
1733
0
    }
1734
0
  }
1735
1736
  /* RFC2047 decode after the RFC822 parsing */
1737
0
  rfc2047_decode_envelope(env);
1738
1739
0
  url_free(&url);
1740
0
  return true;
1741
0
}
1742
1743
/**
1744
 * mutt_parse_part - Parse a MIME part
1745
 * @param fp File to read from
1746
 * @param b  Body to store the results in
1747
 */
1748
void mutt_parse_part(FILE *fp, struct Body *b)
1749
9.74k
{
1750
9.74k
  int counter = 0;
1751
1752
9.74k
  parse_part(fp, b, &counter);
1753
9.74k
}
1754
1755
/**
1756
 * mutt_rfc822_parse_message - Parse a Message/RFC822 body
1757
 * @param fp Stream to read from
1758
 * @param b  Info about the message/rfc822 body part
1759
 * @retval ptr New Body containing parsed message
1760
 *
1761
 * @note This assumes that 'b->length' has been set!
1762
 */
1763
struct Body *mutt_rfc822_parse_message(FILE *fp, struct Body *b)
1764
0
{
1765
0
  int counter = 0;
1766
1767
0
  return rfc822_parse_message(fp, b, &counter);
1768
0
}
1769
1770
/**
1771
 * mutt_parse_multipart - Parse a multipart structure
1772
 * @param fp       Stream to read from
1773
 * @param boundary Body separator
1774
 * @param end_off  Length of the multipart body (used when the final
1775
 *                 boundary is missing to avoid reading too far)
1776
 * @param digest   true if reading a multipart/digest
1777
 * @retval ptr New Body containing parsed structure
1778
 */
1779
struct Body *mutt_parse_multipart(FILE *fp, const char *boundary, LOFF_T end_off, bool digest)
1780
0
{
1781
0
  int counter = 0;
1782
1783
0
  return parse_multipart(fp, boundary, end_off, digest, &counter);
1784
0
}