Coverage Report

Created: 2025-08-03 06:27

/src/dovecot/src/lib-mail/rfc822-parser.c
Line
Count
Source (jump to first uncovered line)
1
/* Copyright (c) 2005-2018 Dovecot authors, see the included COPYING file */
2
3
#include "lib.h"
4
#include "str.h"
5
#include "punycode.h"
6
#include "strescape.h"
7
#include "rfc822-parser.h"
8
9
/*
10
   atext        =       ALPHA / DIGIT / ; Any character except controls,
11
      "!" / "#" /     ;  SP, and specials.
12
      "$" / "%" /     ;  Used for atoms
13
      "&" / "'" /
14
      "*" / "+" /
15
      "-" / "/" /
16
      "=" / "?" /
17
      "^" / "_" /
18
      "`" / "{" /
19
      "|" / "}" /
20
      "~"
21
22
  MIME:
23
24
  token := 1*<any (US-ASCII) CHAR except SPACE, CTLs,
25
        or tspecials>
26
  tspecials :=  "(" / ")" / "<" / ">" / "@" /
27
    "," / ";" / ":" / "\" / <">
28
    "/" / "[" / "]" / "?" / "="
29
30
  So token is same as dot-atom, except stops also at '/', '?' and '='.
31
*/
32
33
/* atext chars are marked with 1, alpha and digits with 2,
34
   atext-but-mime-tspecials with 4 */
35
unsigned char rfc822_atext_chars[256] = {
36
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0-15 */
37
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16-31 */
38
  0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 4, /* 32-47 */
39
  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 4, 0, 4, /* 48-63 */
40
  0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 64-79 */
41
  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 1, 1, /* 80-95 */
42
  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 96-111 */
43
  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 0, /* 112-127 */
44
45
  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
46
  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
47
  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
48
  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
49
  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
50
  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
51
  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
52
  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
53
};
54
55
void rfc822_parser_init(struct rfc822_parser_context *ctx,
56
      const unsigned char *data, size_t size,
57
      string_t *last_comment)
58
0
{
59
0
  i_zero(ctx);
60
0
  ctx->data = data;
61
0
  ctx->end = data + size;
62
0
  ctx->last_comment = last_comment;
63
0
}
64
65
int rfc822_skip_comment(struct rfc822_parser_context *ctx)
66
0
{
67
0
  const unsigned char *start;
68
0
  size_t len;
69
0
  int level = 1;
70
71
0
  i_assert(*ctx->data == '(');
72
73
0
  if (ctx->last_comment != NULL)
74
0
    str_truncate(ctx->last_comment, 0);
75
76
0
  start = ++ctx->data;
77
0
  for (; ctx->data < ctx->end; ctx->data++) {
78
0
    switch (*ctx->data) {
79
0
    case '\0':
80
0
      if (ctx->last_comment != NULL &&
81
0
          ctx->nul_replacement_str != NULL) {
82
0
        str_append_data(ctx->last_comment, start,
83
0
            ctx->data - start);
84
0
        str_append(ctx->last_comment,
85
0
             ctx->nul_replacement_str);
86
0
        start = ctx->data + 1;
87
0
      }
88
0
      break;
89
0
    case '(':
90
0
      level++;
91
0
      break;
92
0
    case ')':
93
0
      if (--level == 0) {
94
0
        if (ctx->last_comment != NULL) {
95
0
          str_append_data(ctx->last_comment, start,
96
0
              ctx->data - start);
97
0
        }
98
0
        ctx->data++;
99
0
        return ctx->data < ctx->end ? 1 : 0;
100
0
      }
101
0
      break;
102
0
    case '\n':
103
      /* folding whitespace, remove the (CR)LF */
104
0
      if (ctx->last_comment == NULL)
105
0
        break;
106
0
      len = ctx->data - start;
107
0
      if (len > 0 && start[len-1] == '\r')
108
0
        len--;
109
0
      str_append_data(ctx->last_comment, start, len);
110
0
      start = ctx->data + 1;
111
0
      break;
112
0
    case '\\':
113
0
      ctx->data++;
114
0
      if (ctx->data >= ctx->end)
115
0
        return -1;
116
117
0
      if (*ctx->data == '\r' || *ctx->data == '\n' ||
118
0
          *ctx->data == '\0') {
119
        /* quoted-pair doesn't allow CR/LF/NUL.
120
           They are part of the obs-qp though, so don't
121
           return them as error. */
122
0
        ctx->data--;
123
0
        break;
124
0
      }
125
0
      if (ctx->last_comment != NULL) {
126
0
        str_append_data(ctx->last_comment, start,
127
0
            ctx->data - start - 1);
128
0
      }
129
0
      start = ctx->data;
130
0
      break;
131
0
    }
132
0
  }
133
134
  /* missing ')' */
135
0
  return -1;
136
0
}
137
138
int rfc822_skip_lwsp(struct rfc822_parser_context *ctx)
139
0
{
140
0
  for (; ctx->data < ctx->end;) {
141
0
    if (*ctx->data == ' ' || *ctx->data == '\t' ||
142
0
        *ctx->data == '\r' || *ctx->data == '\n') {
143
0
      ctx->data++;
144
0
      continue;
145
0
    }
146
147
0
    if (*ctx->data != '(')
148
0
      break;
149
150
0
    if (rfc822_skip_comment(ctx) < 0)
151
0
      return -1;
152
0
  }
153
0
  return ctx->data < ctx->end ? 1 : 0;
154
0
}
155
156
int rfc822_parse_atom(struct rfc822_parser_context *ctx, string_t *str)
157
0
{
158
0
  const unsigned char *start;
159
160
  /*
161
     atom            = [CFWS] 1*atext [CFWS]
162
     atext           =
163
       ; Any character except controls, SP, and specials.
164
  */
165
0
  if (ctx->data >= ctx->end || !IS_ATEXT(*ctx->data))
166
0
    return -1;
167
168
0
  for (start = ctx->data++; ctx->data < ctx->end; ctx->data++) {
169
0
    if (IS_ATEXT(*ctx->data))
170
0
      continue;
171
172
0
    str_append_data(str, start, ctx->data - start);
173
0
    return rfc822_skip_lwsp(ctx);
174
0
  }
175
176
0
  str_append_data(str, start, ctx->data - start);
177
0
  return 0;
178
0
}
179
180
int rfc822_parse_dot_atom(struct rfc822_parser_context *ctx, string_t *str)
181
0
{
182
0
  const unsigned char *start;
183
0
  int ret;
184
185
  /*
186
     dot-atom        = [CFWS] dot-atom-text [CFWS]
187
     dot-atom-text   = 1*atext *("." 1*atext)
188
189
     atext           =
190
       ; Any character except controls, SP, and specials.
191
192
     For RFC-822 compatibility allow LWSP around '.'
193
  */
194
0
  if (ctx->data >= ctx->end || !IS_ATEXT(*ctx->data))
195
0
    return -1;
196
197
0
  for (start = ctx->data++; ctx->data < ctx->end; ) {
198
0
    if (IS_ATEXT(*ctx->data)) {
199
0
      ctx->data++;
200
0
      continue;
201
0
    }
202
203
0
    if (start == ctx->data)
204
0
      return -1;
205
0
    str_append_data(str, start, ctx->data - start);
206
207
0
    if ((ret = rfc822_skip_lwsp(ctx)) <= 0)
208
0
      return ret;
209
210
0
    if (*ctx->data != '.')
211
0
      return 1;
212
213
0
    ctx->data++;
214
0
    str_append_c(str, '.');
215
216
0
    if (rfc822_skip_lwsp(ctx) <= 0)
217
0
      return -1;
218
0
    start = ctx->data;
219
0
  }
220
221
0
  i_assert(start != ctx->data);
222
0
  str_append_data(str, start, ctx->data - start);
223
0
  return 0;
224
0
}
225
226
int rfc822_parse_mime_token(struct rfc822_parser_context *ctx, string_t *str)
227
0
{
228
0
  const unsigned char *start;
229
230
0
  for (start = ctx->data; ctx->data < ctx->end; ctx->data++) {
231
0
    if (IS_ATEXT_NON_TSPECIAL(*ctx->data) || *ctx->data == '.')
232
0
      continue;
233
234
0
    str_append_data(str, start, ctx->data - start);
235
0
    return rfc822_skip_lwsp(ctx);
236
0
  }
237
238
0
  str_append_data(str, start, ctx->data - start);
239
0
  return 0;
240
0
}
241
242
int rfc822_parse_quoted_string(struct rfc822_parser_context *ctx, string_t *str)
243
0
{
244
0
  const unsigned char *start;
245
0
  size_t len;
246
247
0
  i_assert(ctx->data < ctx->end);
248
0
  i_assert(*ctx->data == '"');
249
0
  ctx->data++;
250
251
0
  for (start = ctx->data; ctx->data < ctx->end; ctx->data++) {
252
0
    switch (*ctx->data) {
253
0
    case '\0':
254
0
      if (ctx->nul_replacement_str != NULL) {
255
0
        str_append_data(str, start, ctx->data - start);
256
0
        str_append(str, ctx->nul_replacement_str);
257
0
        start = ctx->data + 1;
258
0
      }
259
0
      break;
260
0
    case '"':
261
0
      str_append_data(str, start, ctx->data - start);
262
0
      ctx->data++;
263
0
      return rfc822_skip_lwsp(ctx);
264
0
    case '\n':
265
      /* folding whitespace, remove the (CR)LF */
266
0
      len = ctx->data - start;
267
0
      if (len > 0 && start[len-1] == '\r')
268
0
        len--;
269
0
      str_append_data(str, start, len);
270
0
      start = ctx->data + 1;
271
0
      break;
272
0
    case '\\':
273
0
      ctx->data++;
274
0
      if (ctx->data >= ctx->end)
275
0
        return -1;
276
277
0
      if (*ctx->data == '\r' || *ctx->data == '\n' ||
278
0
          *ctx->data == '\0') {
279
        /* quoted-pair doesn't allow CR/LF/NUL.
280
           They are part of the obs-qp though, so don't
281
           return them as error. */
282
0
        ctx->data--;
283
0
        break;
284
0
      }
285
0
      str_append_data(str, start, ctx->data - start - 1);
286
0
      start = ctx->data;
287
0
      break;
288
0
    }
289
0
  }
290
291
  /* missing '"' */
292
0
  return -1;
293
0
}
294
295
static int
296
rfc822_parse_atom_or_dot(struct rfc822_parser_context *ctx, string_t *str)
297
0
{
298
0
  const unsigned char *start;
299
300
  /*
301
     atom            = [CFWS] 1*atext [CFWS]
302
     atext           =
303
       ; Any character except controls, SP, and specials.
304
305
     The difference between this function and rfc822_parse_dot_atom()
306
     is that this doesn't just silently skip over all the whitespace.
307
  */
308
0
  for (start = ctx->data; ctx->data < ctx->end; ctx->data++) {
309
0
    if (IS_ATEXT(*ctx->data) || *ctx->data == '.')
310
0
      continue;
311
312
0
    str_append_data(str, start, ctx->data - start);
313
0
    return rfc822_skip_lwsp(ctx);
314
0
  }
315
316
0
  str_append_data(str, start, ctx->data - start);
317
0
  return 0;
318
0
}
319
320
int rfc822_parse_phrase(struct rfc822_parser_context *ctx, string_t *str)
321
0
{
322
0
  int ret;
323
324
  /*
325
     phrase     = 1*word / obs-phrase
326
     word       = atom / quoted-string
327
     obs-phrase = word *(word / "." / CFWS)
328
  */
329
330
0
  if (ctx->data >= ctx->end)
331
0
    return 0;
332
0
  if (*ctx->data == '.')
333
0
    return -1;
334
335
0
  for (;;) {
336
0
    if (*ctx->data == '"')
337
0
      ret = rfc822_parse_quoted_string(ctx, str);
338
0
    else
339
0
      ret = rfc822_parse_atom_or_dot(ctx, str);
340
341
0
    if (ret <= 0)
342
0
      return ret;
343
344
0
    if (!IS_ATEXT(*ctx->data) && *ctx->data != '"'
345
0
        && *ctx->data != '.')
346
0
      break;
347
0
    str_append_c(str, ' ');
348
0
  }
349
0
  return rfc822_skip_lwsp(ctx);
350
0
}
351
352
static int
353
rfc822_parse_domain_literal(struct rfc822_parser_context *ctx, string_t *str)
354
0
{
355
0
  const unsigned char *start;
356
0
  size_t len;
357
358
  /*
359
     domain-literal  = [CFWS] "[" *([FWS] dcontent) [FWS] "]" [CFWS]
360
     dcontent        = dtext / quoted-pair
361
     dtext           = NO-WS-CTL /     ; Non white space controls
362
           %d33-90 /       ; The rest of the US-ASCII
363
           %d94-126        ;  characters not including "[",
364
               ;  "]", or "\"
365
  */
366
0
  i_assert(ctx->data < ctx->end);
367
0
  i_assert(*ctx->data == '[');
368
369
0
  for (start = ctx->data++; ctx->data < ctx->end; ctx->data++) {
370
0
    switch (*ctx->data) {
371
0
    case '\0':
372
0
      if (ctx->nul_replacement_str != NULL) {
373
0
        str_append_data(str, start, ctx->data - start);
374
0
        str_append(str, ctx->nul_replacement_str);
375
0
        start = ctx->data + 1;
376
0
      }
377
0
      break;
378
0
    case '[':
379
      /* not allowed */
380
0
      return -1;
381
0
    case ']':
382
0
      str_append_data(str, start, ctx->data - start + 1);
383
0
      ctx->data++;
384
0
      return rfc822_skip_lwsp(ctx);
385
0
    case '\n':
386
      /* folding whitespace, remove the (CR)LF */
387
0
      len = ctx->data - start;
388
0
      if (len > 0 && start[len-1] == '\r')
389
0
        len--;
390
0
      str_append_data(str, start, len);
391
0
      start = ctx->data + 1;
392
0
      break;
393
0
    case '\\':
394
      /* note: the '\' is preserved in the output */
395
0
      ctx->data++;
396
0
      if (ctx->data >= ctx->end)
397
0
        return -1;
398
399
0
      if (*ctx->data == '\r' || *ctx->data == '\n' ||
400
0
          *ctx->data == '\0') {
401
        /* quoted-pair doesn't allow CR/LF/NUL.
402
           They are part of the obs-qp though, so don't
403
           return them as error. */
404
0
        str_append_data(str, start, ctx->data - start);
405
0
        start = ctx->data;
406
0
        ctx->data--;
407
0
        break;
408
0
      }
409
0
    }
410
0
  }
411
412
  /* missing ']' */
413
0
  return -1;
414
0
}
415
416
void rfc822_decode_punycode(const char *input, size_t len, string_t *result)
417
0
{
418
0
  string_t *decoded = t_str_new(64);
419
0
  const char *pos = input;
420
0
  const char *end = CONST_PTR_OFFSET(input, len);
421
422
0
  while (pos < end) {
423
0
    const char *value;
424
0
    const char *delim = strchr(pos, '.');
425
0
    if (delim == NULL)
426
0
      delim = end;
427
0
    if (str_begins(pos, "xn--", &value)) {
428
0
      str_truncate(decoded, 0);
429
0
      if (punycode_decode(value, delim - value, result) < 0)
430
        /* Consider it as data */
431
0
        str_append_data(result, pos, delim - pos + 1);
432
0
      else if (*delim == '.')
433
0
        str_append_c(result, *delim);
434
0
    } else {
435
      /* No punycode prefix */
436
0
      str_append_data(result, pos, delim - pos + 1);
437
0
    }
438
0
    pos = delim + 1;
439
0
  }
440
0
  if (pos < end)
441
0
    str_append_data(result, pos, end - pos);
442
0
}
443
444
int rfc822_parse_domain(struct rfc822_parser_context *ctx, string_t *str)
445
0
{
446
  /*
447
     domain          = dot-atom / domain-literal / obs-domain
448
     domain-literal  = [CFWS] "[" *([FWS] dcontent) [FWS] "]" [CFWS]
449
     obs-domain      = atom *("." atom)
450
  */
451
0
  i_assert(ctx->data < ctx->end);
452
0
  i_assert(*ctx->data == '@');
453
0
  ctx->data++;
454
455
0
  if (rfc822_skip_lwsp(ctx) <= 0)
456
0
    return -1;
457
458
0
  if (*ctx->data == '[')
459
0
    return rfc822_parse_domain_literal(ctx, str);
460
0
  else {
461
0
    int ret = rfc822_parse_dot_atom(ctx, str);
462
#ifdef EXPERIMENTAL_MAIL_UTF8
463
    if (ret == 0) {
464
      size_t start_pos = str_len(str);
465
      string_t *u = t_str_new(64);
466
      const char *data = t_strndup(str_data(str) + start_pos,
467
                 str_len(str) - start_pos);
468
      rfc822_decode_punycode(data, strlen(data), u);
469
      str_truncate(str, start_pos);
470
      str_append_str(str, u);
471
    }
472
#endif
473
0
    return ret;
474
0
  }
475
0
}
476
477
int rfc822_parse_content_type(struct rfc822_parser_context *ctx, string_t *str)
478
0
{
479
0
  size_t str_pos_0 = str->used;
480
0
  if (rfc822_skip_lwsp(ctx) <= 0)
481
0
    return -1;
482
483
  /* get main type, require at least one byte */
484
0
  if (rfc822_parse_mime_token(ctx, str) <= 0 ||
485
0
      str->used == str_pos_0)
486
0
    return -1;
487
488
  /* skip over "/" */
489
0
  if (*ctx->data != '/') {
490
0
    str_truncate(str, str_pos_0);
491
0
    return -1;
492
0
  }
493
0
  ctx->data++;
494
0
  if (rfc822_skip_lwsp(ctx) <= 0) {
495
0
    str_truncate(str, str_pos_0);
496
0
    return -1;
497
0
  }
498
0
  str_append_c(str, '/');
499
500
0
  size_t str_pos = str->used;
501
  /* get subtype, require at least one byte,
502
     and check the next separator to avoid accepting
503
     invalid values. */
504
0
  int ret;
505
0
  if ((ret = rfc822_parse_mime_token(ctx, str)) < 0 ||
506
0
      str->used == str_pos ||
507
0
      (ctx->data != ctx->end && *ctx->data != ';')) {
508
0
    str_truncate(str, str_pos_0);
509
0
    return -1;
510
0
  }
511
0
  return ret;
512
0
}
513
514
int rfc822_parse_content_param(struct rfc822_parser_context *ctx,
515
             const char **key_r, string_t *value)
516
0
{
517
0
  string_t *key;
518
0
  int ret;
519
520
  /* .. := *(";" parameter)
521
     parameter := attribute "=" value
522
     attribute := token
523
     value := token / quoted-string
524
  */
525
0
  *key_r = NULL;
526
0
  str_truncate(value, 0);
527
528
0
  if (ctx->data >= ctx->end)
529
0
    return 0;
530
0
  if (*ctx->data != ';')
531
0
    return -1;
532
0
  ctx->data++;
533
534
0
  if (rfc822_skip_lwsp(ctx) <= 0)
535
0
    return -1;
536
537
0
  key = t_str_new(64);
538
0
  if (rfc822_parse_mime_token(ctx, key) <= 0)
539
0
    return -1;
540
541
0
  if (*ctx->data != '=')
542
0
    return -1;
543
0
  ctx->data++;
544
545
0
  if ((ret = rfc822_skip_lwsp(ctx)) <= 0) {
546
    /* broken / no value */
547
0
  } else if (*ctx->data == '"') {
548
0
    ret = rfc822_parse_quoted_string(ctx, value);
549
0
  } else if (ctx->data < ctx->end && *ctx->data == '=') {
550
    /* workaround for broken input:
551
       name==?utf-8?b?...?= */
552
0
    while (ctx->data < ctx->end && *ctx->data != ';' &&
553
0
           *ctx->data != ' ' && *ctx->data != '\t' &&
554
0
           *ctx->data != '\r' && *ctx->data != '\n') {
555
0
      str_append_c(value, *ctx->data);
556
0
      ctx->data++;
557
0
    }
558
0
  } else {
559
0
    ret = rfc822_parse_mime_token(ctx, value);
560
0
  }
561
562
0
  *key_r = str_c(key);
563
0
  return ret < 0 ? -1 : 1;
564
0
}