Coverage Report

Created: 2023-06-07 06:18

/src/dovecot/src/lib-mail/message-parser.c
Line
Count
Source (jump to first uncovered line)
1
/* Copyright (c) 2002-2018 Dovecot authors, see the included COPYING file */
2
3
#include "lib.h"
4
#include "array.h"
5
#include "str.h"
6
#include "istream.h"
7
#include "rfc822-parser.h"
8
#include "rfc2231-parser.h"
9
#include "message-parser-private.h"
10
11
message_part_header_callback_t *null_message_part_header_callback = NULL;
12
13
static int parse_next_header_init(struct message_parser_ctx *ctx,
14
          struct message_block *block_r);
15
static int parse_next_body_to_boundary(struct message_parser_ctx *ctx,
16
               struct message_block *block_r);
17
static int parse_next_body_to_eof(struct message_parser_ctx *ctx,
18
          struct message_block *block_r);
19
20
static struct message_boundary *
21
boundary_find(struct message_boundary *boundaries,
22
        const unsigned char *data, size_t len, bool trailing_dashes)
23
523k
{
24
523k
  struct message_boundary *best = NULL;
25
26
  /* As MIME spec says: search from latest one to oldest one so that we
27
     don't break if the same boundary is used in nested parts. Also the
28
     full message line doesn't have to match the boundary, only the
29
     beginning. However, if there are multiple prefixes whose beginning
30
     matches, use the longest matching one. */
31
1.48M
  while (boundaries != NULL) {
32
1.42M
    if (boundaries->len <= len &&
33
1.42M
        memcmp(boundaries->boundary, data, boundaries->len) == 0 &&
34
1.42M
        (best == NULL || best->len < boundaries->len)) {
35
517k
      best = boundaries;
36
      /* If we see "foo--", it could either mean that there
37
         is a boundary named "foo" that ends now or there's
38
         a boundary "foo--" which continues. */
39
517k
      if (best->len == len ||
40
517k
          (best->len == len-2 && trailing_dashes)) {
41
        /* This is exactly the wanted boundary. There
42
           can't be a better one. */
43
468k
        break;
44
468k
      }
45
517k
    }
46
47
956k
    boundaries = boundaries->next;
48
956k
  }
49
50
523k
  return best;
51
523k
}
52
53
static void parse_body_add_block(struct message_parser_ctx *ctx,
54
         struct message_block *block)
55
450k
{
56
450k
  unsigned int missing_cr_count = 0;
57
450k
  const unsigned char *cur, *next, *data = block->data;
58
59
450k
  i_assert(block->size > 0);
60
61
450k
  block->hdr = NULL;
62
63
  /* check if we have NULs */
64
450k
  if (memchr(data, '\0', block->size) != NULL)
65
6.98k
    ctx->part->flags |= MESSAGE_PART_FLAG_HAS_NULS;
66
67
  /* count number of lines and missing CRs */
68
450k
  if (*data == '\n') {
69
205k
    ctx->part->body_size.lines++;
70
205k
    if (ctx->last_chr != '\r')
71
205k
      missing_cr_count++;
72
205k
  }
73
74
450k
  cur = data + 1;
75
639k
  while ((next = memchr(cur, '\n', block->size - (cur - data))) != NULL) {
76
188k
    ctx->part->body_size.lines++;
77
188k
    if (next[-1] != '\r')
78
168k
      missing_cr_count++;
79
80
188k
    cur = next + 1;
81
188k
  }
82
450k
  ctx->last_chr = data[block->size - 1];
83
450k
  ctx->skip += block->size;
84
85
450k
  ctx->part->body_size.physical_size += block->size;
86
450k
  ctx->part->body_size.virtual_size += block->size + missing_cr_count;
87
450k
}
88
89
int message_parser_read_more(struct message_parser_ctx *ctx,
90
           struct message_block *block_r, bool *full_r)
91
1.23M
{
92
1.23M
  int ret;
93
94
1.23M
  if (ctx->skip > 0) {
95
450k
    i_stream_skip(ctx->input, ctx->skip);
96
450k
    ctx->skip = 0;
97
450k
  }
98
99
1.23M
  *full_r = FALSE;
100
1.23M
  ret = i_stream_read_bytes(ctx->input, &block_r->data,
101
1.23M
          &block_r->size, ctx->want_count + 1);
102
1.23M
  if (ret <= 0) {
103
11.1k
    switch (ret) {
104
0
    case 0:
105
0
      if (!ctx->input->eof) {
106
0
        i_assert(!ctx->input->blocking);
107
0
        return 0;
108
0
      }
109
0
      break;
110
11.1k
    case -1:
111
11.1k
      i_assert(ctx->input->eof ||
112
11.1k
         ctx->input->stream_errno != 0);
113
11.1k
      ctx->eof = TRUE;
114
11.1k
      if (block_r->size != 0) {
115
        /* EOF, but we still have some data.
116
           return it. */
117
1.76k
        return 1;
118
1.76k
      }
119
9.39k
      return -1;
120
0
    case -2:
121
0
      *full_r = TRUE;
122
0
      break;
123
0
    default:
124
0
      i_unreached();
125
11.1k
    }
126
11.1k
  }
127
128
1.22M
  if (!*full_r) {
129
    /* reset number of wanted characters if we actually got them */
130
1.22M
    ctx->want_count = 1;
131
1.22M
  }
132
1.22M
  return 1;
133
1.23M
}
134
135
static void
136
message_part_append(struct message_parser_ctx *ctx)
137
300k
{
138
300k
  struct message_part *parent = ctx->part;
139
300k
  struct message_part *part;
140
141
300k
  i_assert(!ctx->preparsed);
142
300k
  i_assert(parent != NULL);
143
300k
  i_assert((parent->flags & (MESSAGE_PART_FLAG_MULTIPART |
144
300k
           MESSAGE_PART_FLAG_MESSAGE_RFC822)) != 0);
145
146
300k
  part = p_new(ctx->part_pool, struct message_part, 1);
147
300k
  part->parent = parent;
148
149
  /* set child position */
150
300k
  part->physical_pos =
151
300k
    parent->physical_pos +
152
300k
    parent->body_size.physical_size +
153
300k
    parent->header_size.physical_size;
154
155
  /* add to parent's linked list */
156
300k
  *ctx->next_part = part;
157
  /* update the parent's end-of-linked-list pointer */
158
300k
  struct message_part **next_part = &part->next;
159
300k
  array_push_back(&ctx->next_part_stack, &next_part);
160
  /* This part is now the new parent for the next message_part_append()
161
     call. Its linked list begins with the children pointer. */
162
300k
  ctx->next_part = &part->children;
163
164
300k
  ctx->part = part;
165
300k
  ctx->nested_parts_count++;
166
300k
  ctx->total_parts_count++;
167
300k
  i_assert(ctx->nested_parts_count < ctx->max_nested_mime_parts);
168
300k
  i_assert(ctx->total_parts_count <= ctx->max_total_mime_parts);
169
300k
}
170
171
static void message_part_finish(struct message_parser_ctx *ctx)
172
300k
{
173
300k
  struct message_part **const *parent_next_partp;
174
175
300k
  if (!ctx->preparsed) {
176
300k
    i_assert(ctx->nested_parts_count > 0);
177
300k
    ctx->nested_parts_count--;
178
179
300k
    parent_next_partp = array_back(&ctx->next_part_stack);
180
300k
    array_pop_back(&ctx->next_part_stack);
181
300k
    ctx->next_part = *parent_next_partp;
182
300k
  }
183
184
300k
  message_size_add(&ctx->part->parent->body_size, &ctx->part->body_size);
185
300k
  message_size_add(&ctx->part->parent->body_size, &ctx->part->header_size);
186
300k
  ctx->part->parent->children_count += 1 + ctx->part->children_count;
187
300k
  ctx->part = ctx->part->parent;
188
300k
}
189
190
static void message_boundary_free(struct message_boundary *b)
191
10.1k
{
192
10.1k
  i_free(b->boundary);
193
10.1k
  i_free(b);
194
10.1k
}
195
196
static void
197
boundary_remove_until(struct message_parser_ctx *ctx,
198
          struct message_boundary *boundary)
199
226k
{
200
236k
  while (ctx->boundaries != boundary) {
201
10.1k
    struct message_boundary *cur = ctx->boundaries;
202
203
10.1k
    i_assert(cur != NULL);
204
10.1k
    ctx->boundaries = cur->next;
205
10.1k
    message_boundary_free(cur);
206
207
10.1k
  }
208
226k
  ctx->boundaries = boundary;
209
226k
}
210
211
static void parse_next_body_multipart_init(struct message_parser_ctx *ctx)
212
10.1k
{
213
10.1k
  struct message_boundary *b;
214
215
10.1k
  b = i_new(struct message_boundary, 1);
216
10.1k
  b->part = ctx->part;
217
10.1k
  b->boundary = ctx->last_boundary;
218
10.1k
  ctx->last_boundary = NULL;
219
10.1k
  b->len = strlen(b->boundary);
220
221
10.1k
  b->next = ctx->boundaries;
222
10.1k
  ctx->boundaries = b;
223
10.1k
}
224
225
static int parse_next_body_message_rfc822_init(struct message_parser_ctx *ctx,
226
                 struct message_block *block_r)
227
81.9k
{
228
81.9k
  message_part_append(ctx);
229
81.9k
  return parse_next_header_init(ctx, block_r);
230
81.9k
}
231
232
static int
233
boundary_line_find(struct message_parser_ctx *ctx,
234
       const unsigned char *data, size_t size, bool full,
235
       struct message_boundary **boundary_r)
236
926k
{
237
926k
  *boundary_r = NULL;
238
239
926k
  if (size < 2) {
240
458
    i_assert(!full);
241
242
458
    if (ctx->input->eof)
243
327
      return -1;
244
131
    ctx->want_count = 2;
245
131
    return 0;
246
458
  }
247
248
926k
  if (data[0] != '-' || data[1] != '-') {
249
    /* not a boundary, just skip this line */
250
376k
    return -1;
251
376k
  }
252
253
549k
  if (ctx->total_parts_count >= ctx->max_total_mime_parts) {
254
    /* can't add any more MIME parts. just stop trying to find
255
       more boundaries. */
256
25.2k
    ctx->part->flags |= MESSAGE_PART_FLAG_OVERFLOW;
257
25.2k
    return -1;
258
25.2k
  }
259
260
  /* need to find the end of line */
261
524k
  data += 2;
262
524k
  size -= 2;
263
524k
  const unsigned char *lf_pos = memchr(data, '\n', size);
264
524k
  if (lf_pos == NULL &&
265
524k
      size+2 < BOUNDARY_END_MAX_LEN &&
266
524k
      !ctx->input->eof && !full) {
267
    /* no LF found */
268
435
    ctx->want_count = BOUNDARY_END_MAX_LEN;
269
435
    return 0;
270
435
  }
271
523k
  size_t find_size = size;
272
523k
  bool trailing_dashes = FALSE;
273
274
523k
  if (lf_pos != NULL) {
275
522k
    find_size = lf_pos - data;
276
522k
    if (find_size > 0 && data[find_size-1] == '\r')
277
1.39k
      find_size--;
278
522k
    if (find_size > 2 && data[find_size-1] == '-' &&
279
522k
        data[find_size-2] == '-')
280
5.14k
      trailing_dashes = TRUE;
281
522k
  } else if (find_size > BOUNDARY_END_MAX_LEN)
282
57
    find_size = BOUNDARY_END_MAX_LEN;
283
284
523k
  *boundary_r = boundary_find(ctx->boundaries, data, find_size,
285
523k
            trailing_dashes);
286
523k
  if (*boundary_r == NULL)
287
7.45k
    return -1;
288
289
516k
  (*boundary_r)->epilogue_found =
290
516k
    size >= (*boundary_r)->len + 2 &&
291
516k
    memcmp(data + (*boundary_r)->len, "--", 2) == 0;
292
516k
  return 1;
293
523k
}
294
295
static int parse_next_mime_header_init(struct message_parser_ctx *ctx,
296
               struct message_block *block_r)
297
218k
{
298
218k
  message_part_append(ctx);
299
218k
  ctx->part->flags |= MESSAGE_PART_FLAG_IS_MIME;
300
301
218k
  return parse_next_header_init(ctx, block_r);
302
218k
}
303
304
static int parse_next_body_skip_boundary_line(struct message_parser_ctx *ctx,
305
                struct message_block *block_r)
306
221k
{
307
221k
  const unsigned char *ptr;
308
221k
  int ret;
309
221k
  bool full;
310
311
221k
  if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0)
312
274
    return ret;
313
314
221k
  ptr = memchr(block_r->data, '\n', block_r->size);
315
221k
  if (ptr == NULL) {
316
153
    parse_body_add_block(ctx, block_r);
317
153
    if (block_r->size > 0 &&
318
153
        (ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_BOUNDARIES) != 0)
319
0
      return 1;
320
153
    return 0;
321
153
  }
322
323
  /* found the LF */
324
220k
  block_r->size = (ptr - block_r->data) + 1;
325
220k
  parse_body_add_block(ctx, block_r);
326
327
220k
  if (ctx->boundaries == NULL || ctx->boundaries->part != ctx->part) {
328
    /* epilogue */
329
2.57k
    if (ctx->boundaries != NULL)
330
2.55k
      ctx->parse_next_block = parse_next_body_to_boundary;
331
28
    else
332
28
      ctx->parse_next_block = parse_next_body_to_eof;
333
218k
  } else {
334
    /* a new MIME part begins */
335
218k
    ctx->parse_next_block = parse_next_mime_header_init;
336
218k
  }
337
220k
  if (block_r->size > 0 &&
338
220k
      (ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_BOUNDARIES) != 0)
339
0
    return 1;
340
220k
  return ctx->parse_next_block(ctx, block_r);
341
220k
}
342
343
static int parse_part_finish(struct message_parser_ctx *ctx,
344
           struct message_boundary *boundary,
345
           struct message_block *block_r, bool first_line)
346
221k
{
347
221k
  size_t line_size;
348
221k
  size_t boundary_len = boundary->len;
349
221k
  bool boundary_epilogue_found = boundary->epilogue_found;
350
351
221k
  i_assert(ctx->last_boundary == NULL);
352
353
  /* get back to parent MIME part, summing the child MIME part sizes
354
     into parent's body sizes */
355
515k
  while (ctx->part != boundary->part) {
356
294k
    message_part_finish(ctx);
357
294k
    i_assert(ctx->part != NULL);
358
294k
  }
359
360
221k
  if (boundary->epilogue_found) {
361
    /* this boundary isn't needed anymore */
362
2.60k
    boundary_remove_until(ctx, boundary->next);
363
218k
  } else {
364
    /* forget about the boundaries we possibly skipped */
365
218k
    boundary_remove_until(ctx, boundary);
366
218k
  }
367
368
  /* the boundary itself should already be in buffer. add that. */
369
221k
  block_r->data = i_stream_get_data(ctx->input, &block_r->size);
370
221k
  i_assert(block_r->size >= ctx->skip);
371
221k
  block_r->data += ctx->skip;
372
  /* [[\r]\n]--<boundary>[--] */
373
221k
  if (first_line)
374
211k
    line_size = 0;
375
9.91k
  else if (block_r->data[0] == '\r') {
376
3.85k
    i_assert(block_r->data[1] == '\n');
377
3.85k
    line_size = 2;
378
6.06k
  } else {
379
6.06k
    i_assert(block_r->data[0] == '\n');
380
6.06k
    line_size = 1;
381
6.06k
  }
382
221k
  line_size += 2 + boundary_len + (boundary_epilogue_found ? 2 : 0);
383
221k
  i_assert(block_r->size >= ctx->skip + line_size);
384
221k
  block_r->size = line_size;
385
221k
  parse_body_add_block(ctx, block_r);
386
387
221k
  ctx->parse_next_block = parse_next_body_skip_boundary_line;
388
389
221k
  if ((ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_BOUNDARIES) != 0)
390
0
    return 1;
391
221k
  return ctx->parse_next_block(ctx, block_r);
392
221k
}
393
394
static int parse_next_body_to_boundary(struct message_parser_ctx *ctx,
395
               struct message_block *block_r)
396
230k
{
397
230k
  struct message_boundary *boundary = NULL;
398
230k
  const unsigned char *data, *cur, *next, *end;
399
230k
  size_t boundary_start;
400
230k
  int ret;
401
230k
  bool full;
402
403
230k
  if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0)
404
1.33k
    return ret;
405
406
229k
  data = block_r->data;
407
229k
  if (ctx->last_chr == '\n') {
408
    /* handle boundary in first line of message. alternatively
409
       it's an empty line. */
410
226k
    ret = boundary_line_find(ctx, block_r->data,
411
226k
           block_r->size, full, &boundary);
412
226k
    if (ret >= 0) {
413
211k
      return ret == 0 ? 0 :
414
211k
        parse_part_finish(ctx, boundary, block_r, TRUE);
415
211k
    }
416
226k
  }
417
418
229k
  i_assert(block_r->size > 0);
419
18.3k
  boundary_start = 0;
420
421
  /* skip to beginning of the next line. the first line was
422
     handled already. */
423
18.3k
  cur = data; end = data + block_r->size;
424
126k
  while ((next = memchr(cur, '\n', end - cur)) != NULL) {
425
126k
    cur = next + 1;
426
427
126k
    boundary_start = next - data;
428
126k
    if (next > data && next[-1] == '\r')
429
22.5k
      boundary_start--;
430
431
126k
    if (boundary_start != 0) {
432
      /* we can at least skip data until the first [CR]LF.
433
         input buffer can't be full anymore. */
434
113k
      full = FALSE;
435
113k
    }
436
437
126k
    ret = boundary_line_find(ctx, cur, end - cur, full, &boundary);
438
126k
    if (ret >= 0) {
439
      /* found / need more data */
440
17.7k
      if (ret == 0 && boundary_start == 0)
441
60
        ctx->want_count += cur - block_r->data;
442
17.7k
      break;
443
17.7k
    }
444
126k
  }
445
446
18.3k
  if (next != NULL) {
447
    /* found / need more data */
448
17.7k
    i_assert(ret >= 0);
449
17.7k
    i_assert(!(ret == 0 && full));
450
17.7k
  } else if (boundary_start == 0) {
451
    /* no linefeeds in this block. we can just skip it. */
452
421
    ret = 0;
453
421
    if (block_r->data[block_r->size-1] == '\r' && !ctx->eof) {
454
      /* this may be the beginning of the \r\n--boundary */
455
3
      block_r->size--;
456
3
    }
457
421
    boundary_start = block_r->size;
458
421
  } else {
459
    /* the boundary wasn't found from this data block,
460
       we'll need more data. */
461
98
    ret = 0;
462
98
    ctx->want_count = (block_r->size - boundary_start) + 1;
463
98
  }
464
465
18.3k
  if (ret > 0 || (ret == 0 && !ctx->eof)) {
466
    /* a) we found the boundary
467
       b) we need more data and haven't reached EOF yet
468
       so leave CR+LF + last line to buffer */
469
17.8k
    block_r->size = boundary_start;
470
17.8k
  }
471
18.3k
  if (block_r->size != 0) {
472
8.33k
    parse_body_add_block(ctx, block_r);
473
474
8.33k
    if ((ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) != 0 &&
475
8.33k
        (ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS) == 0)
476
0
      return 0;
477
478
8.33k
    return 1;
479
8.33k
  }
480
9.97k
  return ret <= 0 ? ret :
481
9.97k
    parse_part_finish(ctx, boundary, block_r, FALSE);
482
18.3k
}
483
484
static int parse_next_body_to_eof(struct message_parser_ctx *ctx,
485
          struct message_block *block_r)
486
3.51k
{
487
3.51k
  bool full;
488
3.51k
  int ret;
489
490
3.51k
  if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0)
491
3.37k
    return ret;
492
493
143
  parse_body_add_block(ctx, block_r);
494
495
143
  if ((ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) != 0 &&
496
143
      (ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS) == 0)
497
0
    return 0;
498
499
143
  return 1;
500
143
}
501
502
static void parse_content_type(struct message_parser_ctx *ctx,
503
             struct message_header_line *hdr)
504
54.7k
{
505
54.7k
  struct rfc822_parser_context parser;
506
54.7k
  const char *const *results, *suffix;
507
54.7k
  string_t *content_type;
508
54.7k
  int ret;
509
510
54.7k
  if (ctx->part_seen_content_type)
511
1.10k
    return;
512
53.6k
  ctx->part_seen_content_type = TRUE;
513
514
53.6k
  rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL);
515
53.6k
  rfc822_skip_lwsp(&parser);
516
517
53.6k
  content_type = t_str_new(64);
518
53.6k
  ret = rfc822_parse_content_type(&parser, content_type);
519
520
53.6k
  if (strcasecmp(str_c(content_type), "message/rfc822") == 0)
521
332
    ctx->part->flags |= MESSAGE_PART_FLAG_MESSAGE_RFC822;
522
53.3k
  else if (str_begins_icase(str_c(content_type), "text", &suffix) &&
523
53.3k
     (suffix[0] == '\0' || suffix[0] == '/'))
524
396
    ctx->part->flags |= MESSAGE_PART_FLAG_TEXT;
525
52.9k
  else if (str_begins_icase(str_c(content_type), "multipart/", &suffix)) {
526
39.2k
    ctx->part->flags |= MESSAGE_PART_FLAG_MULTIPART;
527
528
39.2k
    if (strcasecmp(suffix, "digest") == 0)
529
3.46k
      ctx->part->flags |= MESSAGE_PART_FLAG_MULTIPART_DIGEST;
530
39.2k
  }
531
532
53.6k
  if (ret < 0 ||
533
53.6k
      (ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) == 0 ||
534
53.6k
      ctx->last_boundary != NULL) {
535
14.4k
    rfc822_parser_deinit(&parser);
536
14.4k
    return;
537
14.4k
  }
538
539
39.2k
  rfc2231_parse(&parser, &results);
540
7.02M
  for (; *results != NULL; results += 2) {
541
7.01M
    if (strcasecmp(results[0], "boundary") == 0) {
542
      /* truncate excessively long boundaries */
543
22.9k
      i_free(ctx->last_boundary);
544
22.9k
      ctx->last_boundary =
545
22.9k
        i_strndup(results[1], BOUNDARY_STRING_MAX_LEN);
546
22.9k
      break;
547
22.9k
    }
548
7.01M
  }
549
39.2k
  rfc822_parser_deinit(&parser);
550
39.2k
}
551
552
static bool block_is_at_eoh(const struct message_block *block)
553
777k
{
554
777k
  if (block->size < 1)
555
0
    return FALSE;
556
777k
  if (block->data[0] == '\n')
557
55.0k
    return TRUE;
558
721k
  if (block->data[0] == '\r') {
559
3.09k
    if (block->size < 2)
560
13
      return FALSE;
561
3.07k
    if (block->data[1] == '\n')
562
2.02k
      return TRUE;
563
3.07k
  }
564
719k
  return FALSE;
565
721k
}
566
567
static bool parse_too_many_nested_mime_parts(struct message_parser_ctx *ctx)
568
103k
{
569
103k
  return ctx->nested_parts_count+1 >= ctx->max_nested_mime_parts;
570
103k
}
571
572
#define MUTEX_FLAGS \
573
  (MESSAGE_PART_FLAG_MESSAGE_RFC822 | MESSAGE_PART_FLAG_MULTIPART)
574
575
static int parse_next_header(struct message_parser_ctx *ctx,
576
           struct message_block *block_r)
577
781k
{
578
781k
  struct message_part *part = ctx->part;
579
781k
  struct message_header_line *hdr;
580
781k
  struct message_boundary *boundary;
581
781k
  bool full;
582
781k
  int ret;
583
584
781k
  if ((ret = message_parser_read_more(ctx, block_r, &full)) == 0)
585
0
    return ret;
586
587
781k
  if (ret > 0 && block_is_at_eoh(block_r) &&
588
781k
      ctx->last_boundary != NULL &&
589
781k
      (part->flags & MESSAGE_PART_FLAG_IS_MIME) != 0) {
590
    /* we are at the end of headers and we've determined that we're
591
       going to start a multipart. add the boundary already here
592
       at this point so we can reliably determine whether the
593
       "\n--boundary" belongs to us or to a previous boundary.
594
       this is a problem if the boundary prefixes are identical,
595
       because MIME requires only the prefix to match. */
596
21.6k
    if (!parse_too_many_nested_mime_parts(ctx)) {
597
10.1k
      parse_next_body_multipart_init(ctx);
598
10.1k
      ctx->multipart = TRUE;
599
11.4k
    } else {
600
11.4k
      part->flags |= MESSAGE_PART_FLAG_OVERFLOW;
601
11.4k
      part->flags &= ENUM_NEGATE(MESSAGE_PART_FLAG_MULTIPART);
602
11.4k
    }
603
21.6k
  }
604
605
  /* before parsing the header see if we can find a --boundary from here.
606
     we're guaranteed to be at the beginning of the line here. */
607
781k
  if (ret > 0) {
608
777k
    ret = ctx->boundaries == NULL ? -1 :
609
777k
      boundary_line_find(ctx, block_r->data,
610
574k
             block_r->size, full, &boundary);
611
777k
    if (ret > 0 && boundary->part == ctx->part) {
612
      /* our own body begins with our own --boundary.
613
         we don't want to handle that yet. */
614
8.35k
      ret = -1;
615
8.35k
    }
616
777k
  }
617
781k
  if (ret < 0) {
618
    /* no boundary */
619
501k
    ret = message_parse_header_next(ctx->hdr_parser_ctx, &hdr);
620
501k
    if (ret == 0 || (ret < 0 && ctx->input->stream_errno != 0)) {
621
0
      ctx->want_count = i_stream_get_data_size(ctx->input) + 1;
622
0
      return ret;
623
0
    }
624
501k
  } else if (ret == 0) {
625
    /* need more data */
626
382
    return 0;
627
279k
  } else {
628
    /* boundary found. stop parsing headers here. The previous
629
       [CR]LF belongs to the MIME boundary though. */
630
279k
    if (ctx->prev_hdr_newline_size > 0) {
631
59.5k
      i_assert(ctx->part->header_size.lines > 0);
632
      /* remove the newline size from the MIME header */
633
59.5k
      ctx->part->header_size.lines--;
634
59.5k
      ctx->part->header_size.physical_size -=
635
59.5k
        ctx->prev_hdr_newline_size;
636
59.5k
      ctx->part->header_size.virtual_size -= 2;
637
      /* add the newline size to the parent's body */
638
59.5k
      ctx->part->parent->body_size.lines++;
639
59.5k
      ctx->part->parent->body_size.physical_size +=
640
59.5k
        ctx->prev_hdr_newline_size;
641
59.5k
      ctx->part->parent->body_size.virtual_size += 2;
642
59.5k
    }
643
279k
    hdr = NULL;
644
279k
  }
645
646
780k
  if (hdr != NULL) {
647
475k
    if (hdr->eoh)
648
51.2k
      ;
649
424k
    else if (strcasecmp(hdr->name, "Mime-Version") == 0) {
650
      /* it's MIME. Content-* headers are valid */
651
267
      part->flags |= MESSAGE_PART_FLAG_IS_MIME;
652
424k
    } else if (strcasecmp(hdr->name, "Content-Type") == 0) {
653
64.9k
      if ((ctx->flags &
654
64.9k
           MESSAGE_PARSER_FLAG_MIME_VERSION_STRICT) == 0)
655
64.9k
        part->flags |= MESSAGE_PART_FLAG_IS_MIME;
656
657
64.9k
      if (hdr->continues)
658
10.1k
        hdr->use_full_value = TRUE;
659
109k
      else T_BEGIN {
660
109k
        parse_content_type(ctx, hdr);
661
109k
      } T_END;
662
64.9k
    }
663
664
475k
    block_r->hdr = hdr;
665
475k
    block_r->size = 0;
666
475k
    ctx->prev_hdr_newline_size = hdr->no_newline ? 0 :
667
475k
      (hdr->crlf_newline ? 2 : 1);
668
475k
    return 1;
669
475k
  }
670
671
  /* end of headers */
672
305k
  if ((part->flags & MESSAGE_PART_FLAG_IS_MIME) == 0) {
673
    /* It's not MIME. Reset everything we found from
674
       Content-Type. */
675
81.2k
    i_assert(!ctx->multipart);
676
81.2k
    part->flags = 0;
677
81.2k
  }
678
305k
  i_free(ctx->last_boundary);
679
680
305k
  if (!ctx->part_seen_content_type ||
681
305k
      (part->flags & MESSAGE_PART_FLAG_IS_MIME) == 0) {
682
251k
    if (part->parent != NULL &&
683
251k
        (part->parent->flags &
684
250k
         MESSAGE_PART_FLAG_MULTIPART_DIGEST) != 0) {
685
      /* when there's no content-type specified and we're
686
         below multipart/digest, assume message/rfc822
687
         content-type */
688
81.8k
      part->flags |= MESSAGE_PART_FLAG_MESSAGE_RFC822;
689
169k
    } else {
690
      /* otherwise we default to text/plain */
691
169k
      part->flags |= MESSAGE_PART_FLAG_TEXT;
692
169k
    }
693
251k
  }
694
695
305k
  if (message_parse_header_has_nuls(ctx->hdr_parser_ctx))
696
15.4k
    part->flags |= MESSAGE_PART_FLAG_HAS_NULS;
697
305k
  message_parse_header_deinit(&ctx->hdr_parser_ctx);
698
699
305k
  i_assert((part->flags & MUTEX_FLAGS) != MUTEX_FLAGS);
700
701
305k
  ctx->last_chr = '\n';
702
305k
  if (ctx->multipart) {
703
10.1k
    i_assert(ctx->last_boundary == NULL);
704
10.1k
    ctx->multipart = FALSE;
705
10.1k
    ctx->parse_next_block = parse_next_body_to_boundary;
706
295k
  } else if ((part->flags & MESSAGE_PART_FLAG_MESSAGE_RFC822) == 0) {
707
    /* Not message/rfc822 */
708
212k
    if (ctx->boundaries != NULL)
709
209k
      ctx->parse_next_block = parse_next_body_to_boundary;
710
3.34k
    else
711
3.34k
      ctx->parse_next_block = parse_next_body_to_eof;
712
212k
  } else if (!parse_too_many_nested_mime_parts(ctx) &&
713
82.1k
       ctx->total_parts_count < ctx->max_total_mime_parts) {
714
    /* message/rfc822 - not reached MIME part limits yet */
715
81.9k
    ctx->parse_next_block = parse_next_body_message_rfc822_init;
716
81.9k
  } else {
717
    /* message/rfc822 - already reached MIME part limits */
718
205
    part->flags |= MESSAGE_PART_FLAG_OVERFLOW;
719
205
    part->flags &= ENUM_NEGATE(MESSAGE_PART_FLAG_MESSAGE_RFC822);
720
205
    if (ctx->boundaries != NULL)
721
204
      ctx->parse_next_block = parse_next_body_to_boundary;
722
1
    else
723
1
      ctx->parse_next_block = parse_next_body_to_eof;
724
205
  }
725
726
305k
  ctx->want_count = 1;
727
728
  /* return empty block as end of headers */
729
305k
  block_r->hdr = NULL;
730
305k
  block_r->size = 0;
731
305k
  return 1;
732
305k
}
733
734
static int parse_next_header_init(struct message_parser_ctx *ctx,
735
          struct message_block *block_r)
736
305k
{
737
305k
  i_assert(ctx->hdr_parser_ctx == NULL);
738
739
305k
  ctx->hdr_parser_ctx =
740
305k
    message_parse_header_init(ctx->input, &ctx->part->header_size,
741
305k
            ctx->hdr_flags);
742
305k
  ctx->part_seen_content_type = FALSE;
743
305k
  ctx->prev_hdr_newline_size = 0;
744
745
305k
  ctx->parse_next_block = parse_next_header;
746
305k
  return parse_next_header(ctx, block_r);
747
305k
}
748
749
struct message_parser_ctx *
750
message_parser_init_int(struct istream *input,
751
      const struct message_parser_settings *set)
752
4.98k
{
753
4.98k
  struct message_parser_ctx *ctx;
754
755
4.98k
  ctx = i_new(struct message_parser_ctx, 1);
756
4.98k
  ctx->hdr_flags = set->hdr_flags;
757
4.98k
  ctx->flags = set->flags;
758
4.98k
  ctx->max_nested_mime_parts = set->max_nested_mime_parts != 0 ?
759
0
    set->max_nested_mime_parts :
760
4.98k
    MESSAGE_PARSER_DEFAULT_MAX_NESTED_MIME_PARTS;
761
4.98k
  ctx->max_total_mime_parts = set->max_total_mime_parts != 0 ?
762
0
    set->max_total_mime_parts :
763
4.98k
    MESSAGE_PARSER_DEFAULT_MAX_TOTAL_MIME_PARTS;
764
4.98k
  ctx->input = input;
765
4.98k
  i_stream_ref(input);
766
4.98k
  return ctx;
767
4.98k
}
768
769
struct message_parser_ctx *
770
message_parser_init(pool_t part_pool, struct istream *input,
771
        const struct message_parser_settings *set)
772
4.98k
{
773
4.98k
  struct message_parser_ctx *ctx;
774
775
4.98k
  ctx = message_parser_init_int(input, set);
776
4.98k
  ctx->part_pool = part_pool;
777
4.98k
  ctx->parts = ctx->part = p_new(part_pool, struct message_part, 1);
778
4.98k
  ctx->next_part = &ctx->part->children;
779
4.98k
  ctx->parse_next_block = parse_next_header_init;
780
4.98k
  ctx->total_parts_count = 1;
781
4.98k
  i_array_init(&ctx->next_part_stack, 4);
782
4.98k
  return ctx;
783
4.98k
}
784
785
void message_parser_deinit(struct message_parser_ctx **_ctx,
786
        struct message_part **parts_r)
787
4.98k
{
788
4.98k
  const char *error;
789
790
4.98k
  i_assert((**_ctx).preparsed == FALSE);
791
4.98k
  if (message_parser_deinit_from_parts(_ctx, parts_r, &error) < 0)
792
0
    i_panic("message_parser_deinit_from_parts: %s", error);
793
4.98k
}
794
795
int message_parser_deinit_from_parts(struct message_parser_ctx **_ctx,
796
             struct message_part **parts_r,
797
             const char **error_r)
798
4.98k
{
799
4.98k
        struct message_parser_ctx *ctx = *_ctx;
800
4.98k
  int ret = ctx->broken_reason != NULL ? -1 : 0;
801
802
4.98k
  *_ctx = NULL;
803
4.98k
  *parts_r = ctx->parts;
804
4.98k
  *error_r = ctx->broken_reason;
805
806
4.98k
  if (ctx->hdr_parser_ctx != NULL)
807
0
    message_parse_header_deinit(&ctx->hdr_parser_ctx);
808
4.98k
  if (ctx->part != NULL) {
809
    /* If the whole message has been parsed, the parts are
810
       usually finished in message_parser_parse_next_block().
811
       However, it's possible that the caller finishes reading
812
       through the istream without calling
813
       message_parser_parse_next_block() afterwards. In that case
814
       we still need to finish these parts. */
815
4.98k
    while (ctx->part->parent != NULL)
816
0
      message_part_finish(ctx);
817
4.98k
  }
818
4.98k
  boundary_remove_until(ctx, NULL);
819
4.98k
  i_assert(ctx->nested_parts_count == 0);
820
821
4.98k
  i_stream_unref(&ctx->input);
822
4.98k
  array_free(&ctx->next_part_stack);
823
4.98k
  i_free(ctx->last_boundary);
824
4.98k
  i_free(ctx);
825
4.98k
  i_assert(ret < 0 || *parts_r != NULL);
826
4.98k
  return ret;
827
4.98k
}
828
829
int message_parser_parse_next_block(struct message_parser_ctx *ctx,
830
            struct message_block *block_r)
831
794k
{
832
794k
  int ret;
833
794k
  bool eof = FALSE, full;
834
835
794k
  i_zero(block_r);
836
837
794k
  while ((ret = ctx->parse_next_block(ctx, block_r)) == 0) {
838
602
    ret = message_parser_read_more(ctx, block_r, &full);
839
602
    if (ret == 0) {
840
0
      i_assert(!ctx->input->blocking);
841
0
      return 0;
842
0
    }
843
602
    if (ret == -1) {
844
153
      i_assert(!eof);
845
153
      eof = TRUE;
846
153
    }
847
602
  }
848
849
794k
  block_r->part = ctx->part;
850
851
794k
  if (ret < 0 && ctx->part != NULL) {
852
    /* Successful EOF or unexpected failure */
853
4.98k
    i_assert(ctx->input->eof || ctx->input->closed ||
854
4.98k
       ctx->input->stream_errno != 0 ||
855
4.98k
       ctx->broken_reason != NULL);
856
10.6k
    while (ctx->part->parent != NULL)
857
5.62k
      message_part_finish(ctx);
858
4.98k
  }
859
860
794k
  if (block_r->size == 0) {
861
    /* data isn't supposed to be read, so make sure it's NULL */
862
785k
    block_r->data = NULL;
863
785k
  }
864
794k
  return ret;
865
794k
}
866
867
#undef message_parser_parse_header
868
void message_parser_parse_header(struct message_parser_ctx *ctx,
869
         struct message_size *hdr_size,
870
         message_part_header_callback_t *callback,
871
         void *context)
872
0
{
873
0
  struct message_block block;
874
0
  int ret;
875
876
0
  while ((ret = message_parser_parse_next_block(ctx, &block)) > 0) {
877
0
    T_BEGIN {
878
0
      callback(block.part, block.hdr, context);
879
0
    } T_END;
880
881
0
    if (block.hdr == NULL)
882
0
      break;
883
0
  }
884
0
  i_assert(ret != 0);
885
0
  i_assert(ctx->part != NULL);
886
887
0
  if (ret < 0) T_BEGIN {
888
    /* well, can't return error so fake end of headers */
889
0
    callback(ctx->part, NULL, context);
890
0
  } T_END;
891
892
0
        *hdr_size = ctx->part->header_size;
893
0
}
894
895
#undef message_parser_parse_body
896
void message_parser_parse_body(struct message_parser_ctx *ctx,
897
             message_part_header_callback_t *hdr_callback,
898
             void *context)
899
0
{
900
0
  struct message_block block;
901
0
  int ret;
902
903
0
  while ((ret = message_parser_parse_next_block(ctx, &block)) > 0) {
904
0
    if (block.size == 0 && hdr_callback != NULL) T_BEGIN {
905
0
      hdr_callback(block.part, block.hdr, context);
906
0
    } T_END;
907
0
  }
908
0
  i_assert(ret != 0);
909
0
}