Coverage Report

Created: 2024-10-02 06:29

/src/dovecot/src/lib-mail/message-header-parser.c
Line
Count
Source (jump to first uncovered line)
1
/* Copyright (c) 2002-2018 Dovecot authors, see the included COPYING file */
2
3
#include "lib.h"
4
#include "buffer.h"
5
#include "istream.h"
6
#include "str.h"
7
#include "strfuncs.h"
8
#include "unichar.h"
9
#include "message-size.h"
10
#include "message-header-parser.h"
11
12
/* RFC 5322 2.1.1 and 2.2 */
13
62.6k
#define MESSAGE_HEADER_NAME_MAX_LEN 1000
14
15
struct message_header_parser_ctx {
16
  struct message_header_line line;
17
18
  struct istream *input;
19
  struct message_size *hdr_size;
20
21
  string_t *name;
22
  buffer_t *value_buf;
23
24
  size_t header_block_max_size;
25
  size_t header_block_total_size;
26
27
  enum message_header_parser_flags flags;
28
  bool skip_line:1;
29
  bool has_nuls:1;
30
};
31
32
struct message_header_parser_ctx *
33
message_parse_header_init(struct istream *input, struct message_size *hdr_size,
34
        enum message_header_parser_flags flags)
35
244k
{
36
244k
  struct message_header_parser_ctx *ctx;
37
38
244k
  ctx = i_new(struct message_header_parser_ctx, 1);
39
244k
  ctx->input = input;
40
244k
  ctx->hdr_size = hdr_size;
41
244k
  ctx->name = str_new(default_pool, 128);
42
244k
  ctx->flags = flags;
43
244k
  ctx->value_buf = buffer_create_dynamic(default_pool, 4096);
44
244k
  ctx->header_block_max_size = MESSAGE_HEADER_BLOCK_DEFAULT_MAX_SIZE;
45
244k
  i_stream_ref(input);
46
47
244k
  if (hdr_size != NULL)
48
244k
    i_zero(hdr_size);
49
244k
  return ctx;
50
244k
}
51
52
void
53
message_parse_header_set_limit(struct message_header_parser_ctx *parser,
54
             size_t header_block_max_size)
55
0
{
56
0
  parser->header_block_max_size = header_block_max_size;
57
0
}
58
59
void
60
message_parse_header_lower_limit(struct message_header_parser_ctx *parser,
61
         size_t header_block_max_size)
62
659k
{
63
659k
  if (header_block_max_size < parser->header_block_max_size)
64
0
    message_parse_header_set_limit(parser, header_block_max_size);
65
659k
}
66
67
void message_parse_header_deinit(struct message_header_parser_ctx **_ctx)
68
244k
{
69
244k
  struct message_header_parser_ctx *ctx = *_ctx;
70
71
244k
  i_stream_unref(&ctx->input);
72
244k
  buffer_free(&ctx->value_buf);
73
244k
  str_free(&ctx->name);
74
244k
  i_free(ctx);
75
76
244k
  *_ctx = NULL;
77
244k
}
78
79
int message_parse_header_next(struct message_header_parser_ctx *ctx,
80
            struct message_header_line **hdr_r)
81
659k
{
82
659k
        struct message_header_line *line = &ctx->line;
83
659k
  const unsigned char *msg;
84
659k
  size_t i, size, startpos, colon_pos, parse_size, skip = 0;
85
659k
  int ret;
86
659k
  bool continued, continues, last_no_newline, last_crlf;
87
659k
  bool no_newline, crlf_newline;
88
89
659k
  *hdr_r = NULL;
90
659k
  if (line->eoh)
91
23.3k
    return -1;
92
93
636k
  if (line->continues)
94
24.4k
    colon_pos = 0;
95
612k
  else {
96
    /* new header line */
97
612k
    line->name_offset = ctx->input->v_offset;
98
612k
    colon_pos = UINT_MAX;
99
612k
    ctx->header_block_total_size += ctx->value_buf->used;
100
612k
    buffer_set_used_size(ctx->value_buf, 0);
101
612k
  }
102
103
636k
  no_newline = FALSE;
104
636k
  crlf_newline = FALSE;
105
636k
  continued = line->continues;
106
636k
  continues = FALSE;
107
108
643k
  for (startpos = 0;;) {
109
643k
    ret = i_stream_read_bytes(ctx->input, &msg, &size, startpos+2);
110
643k
    if (ret >= 0) {
111
      /* we want to know one byte in advance to find out
112
         if it's multiline header */
113
632k
      parse_size = size == 0 ? 0 : size-1;
114
632k
    } else {
115
11.7k
      parse_size = size;
116
11.7k
    }
117
118
643k
    if (ret <= 0 && startpos == parse_size) {
119
7.85k
      if (ret == -1) {
120
7.85k
        if (startpos > 0) {
121
          /* header ended unexpectedly. */
122
3.72k
          no_newline = TRUE;
123
3.72k
          skip = startpos;
124
3.72k
          break;
125
3.72k
        }
126
        /* error / EOF with no bytes */
127
7.85k
        i_assert(skip == 0);
128
4.13k
        return -1;
129
4.13k
      }
130
131
0
      if (size > 0 && !ctx->skip_line && !continued &&
132
0
          (msg[0] == '\n' ||
133
0
           (msg[0] == '\r' && size > 1 && msg[1] == '\n'))) {
134
        /* end of headers - this mostly happens just
135
           with mbox where headers are read separately
136
           from body */
137
0
        size = 0;
138
0
        if (ctx->hdr_size != NULL)
139
0
          ctx->hdr_size->lines++;
140
0
        if (msg[0] == '\r') {
141
0
          skip = 2;
142
0
          crlf_newline = TRUE;
143
0
        } else {
144
0
          skip = 1;
145
0
          if (ctx->hdr_size != NULL)
146
0
            ctx->hdr_size->virtual_size++;
147
0
        }
148
0
        break;
149
0
      }
150
0
      if (ret == 0 && !ctx->input->eof) {
151
        /* stream is nonblocking - need more data */
152
0
        i_assert(skip == 0);
153
0
        return 0;
154
0
      }
155
0
      i_assert(size > 0);
156
157
      /* a) line is larger than input buffer
158
         b) header ended unexpectedly */
159
0
      if (ret == -2) {
160
        /* go back to last LWSP if found. */
161
0
        size_t min_pos = !continued ? colon_pos : 0;
162
0
        for (i = size-1; i > min_pos; i--) {
163
0
          if (IS_LWSP(msg[i])) {
164
0
            size = i;
165
0
            break;
166
0
          }
167
0
        }
168
0
        if (i == min_pos && (msg[size-1] == '\r' ||
169
0
                 msg[size-1] == '\n')) {
170
          /* we may or may not have a full header,
171
             but we don't know until we get the
172
             next character. leave out the
173
             linefeed and finish the header on
174
             the next run. */
175
0
          size--;
176
0
          if (size > 0 && msg[size-1] == '\r')
177
0
            size--;
178
0
        }
179
        /* the buffer really has to be more than 2 to
180
           avoid CRLF looping forever */
181
0
        i_assert(size > 0);
182
183
0
        continues = TRUE;
184
0
      }
185
0
      no_newline = TRUE;
186
0
      skip = size;
187
0
      break;
188
0
    }
189
190
    /* find ':' */
191
636k
    if (colon_pos == UINT_MAX) {
192
27.2M
      for (i = startpos; i < parse_size; i++) {
193
27.2M
        if (msg[i] > ':')
194
13.7M
          continue;
195
196
13.4M
        if (msg[i] == ':' && !ctx->skip_line) {
197
62.6k
          colon_pos = i;
198
62.6k
          line->full_value_offset =
199
62.6k
            ctx->input->v_offset + i + 1;
200
62.6k
          break;
201
62.6k
        }
202
13.3M
        if (msg[i] == '\n') {
203
          /* end of headers, or error */
204
545k
          break;
205
545k
        }
206
207
12.8M
        if (msg[i] == '\0')
208
9.66M
          ctx->has_nuls = TRUE;
209
12.8M
      }
210
608k
    } else {
211
27.5k
      i = startpos;
212
27.5k
    }
213
214
    /* find '\n' */
215
82.3M
    for (; i < parse_size; i++) {
216
82.3M
      if (msg[i] <= '\n') {
217
25.5M
        if (msg[i] == '\n')
218
628k
          break;
219
24.9M
        if (msg[i] == '\0')
220
24.8M
          ctx->has_nuls = TRUE;
221
24.9M
      }
222
82.3M
    }
223
224
636k
    if (i < parse_size && i+1 == size && ret == -2) {
225
      /* we don't know if the line continues. */
226
0
      i++;
227
636k
    } else if (i < parse_size) {
228
      /* got a line */
229
628k
      if (ctx->skip_line) {
230
        /* skipping a line with a huge header name */
231
0
        if (ctx->hdr_size != NULL) {
232
0
          ctx->hdr_size->lines++;
233
0
          ctx->hdr_size->physical_size += i + 1;
234
0
          ctx->hdr_size->virtual_size += i + 1;
235
0
        }
236
0
        if (i == 0 || msg[i-1] != '\r') {
237
          /* missing CR */
238
0
          if (ctx->hdr_size != NULL)
239
0
            ctx->hdr_size->virtual_size++;
240
0
        }
241
242
0
        i_stream_skip(ctx->input, i + 1);
243
0
        startpos = 0;
244
0
        ctx->skip_line = FALSE;
245
0
        continue;
246
0
      }
247
628k
      continues = i+1 < size && IS_LWSP(msg[i+1]);
248
249
628k
      if (ctx->hdr_size != NULL)
250
628k
        ctx->hdr_size->lines++;
251
628k
      if (i == 0 || msg[i-1] != '\r') {
252
        /* missing CR */
253
623k
        if (ctx->hdr_size != NULL)
254
623k
          ctx->hdr_size->virtual_size++;
255
623k
        size = i;
256
623k
      } else {
257
5.57k
        size = i-1;
258
5.57k
        crlf_newline = TRUE;
259
5.57k
      }
260
261
628k
      skip = i+1;
262
628k
      break;
263
628k
    }
264
265
7.30k
    startpos = i;
266
7.30k
  }
267
268
632k
  last_crlf = line->crlf_newline &&
269
632k
    (ctx->flags & MESSAGE_HEADER_PARSER_FLAG_DROP_CR) == 0;
270
632k
  last_no_newline = line->no_newline ||
271
632k
    (ctx->flags & MESSAGE_HEADER_PARSER_FLAG_CLEAN_ONELINE) != 0;
272
273
632k
  line->continues = continues;
274
632k
  line->continued = continued;
275
632k
  line->crlf_newline = crlf_newline;
276
632k
  line->no_newline = no_newline;
277
632k
  if (size == 0 && !continued) {
278
    /* end of headers */
279
32.5k
    line->eoh = TRUE;
280
32.5k
    line->name_len = line->value_len = line->full_value_len = 0;
281
32.5k
    line->name = ""; line->value = line->full_value = NULL;
282
32.5k
    line->middle = NULL; line->middle_len = 0;
283
32.5k
    line->full_value_offset = line->name_offset;
284
32.5k
    line->continues = FALSE;
285
599k
  } else if (line->continued) {
286
24.4k
    line->value = msg;
287
24.4k
    line->value_len = size;
288
575k
  } else if (colon_pos == UINT_MAX) {
289
    /* missing ':', assume the whole line is value */
290
512k
    line->value = msg;
291
512k
    line->value_len = size;
292
512k
    line->full_value_offset = line->name_offset;
293
294
512k
    line->name = "";
295
512k
    line->name_len = 0;
296
297
512k
    line->middle = uchar_empty_ptr;
298
512k
    line->middle_len = 0;
299
512k
  } else {
300
62.6k
    size_t pos;
301
302
62.6k
    line->value = msg + colon_pos+1;
303
62.6k
    line->value_len = size - colon_pos - 1;
304
62.6k
    if ((ctx->flags & MESSAGE_HEADER_PARSER_FLAG_SKIP_INITIAL_LWSP) != 0) {
305
      /* get value. skip all LWSP after ':'. Note that
306
         RFC2822 doesn't say we should, but history behind
307
         it..
308
309
         Exception to this is if the value consists only of
310
         LWSP, then skip only the one LWSP after ':'. */
311
0
      for (pos = 0; pos < line->value_len; pos++) {
312
0
        if (!IS_LWSP(line->value[pos]))
313
0
          break;
314
0
      }
315
316
0
      if (pos == line->value_len) {
317
        /* everything was LWSP */
318
0
        if (line->value_len > 0 &&
319
0
            IS_LWSP(line->value[0]))
320
0
          pos = 1;
321
0
      }
322
62.6k
    } else {
323
62.6k
      pos = line->value_len > 0 &&
324
62.6k
        IS_LWSP(line->value[0]) ? 1 : 0;
325
62.6k
    }
326
327
62.6k
    line->value += pos;
328
62.6k
    line->value_len -= pos;
329
62.6k
    line->full_value_offset += pos;
330
331
    /* get name, skip LWSP before ':' */
332
63.4k
    while (colon_pos > 0 && IS_LWSP(msg[colon_pos-1]))
333
782
      colon_pos--;
334
335
    /* Treat overlong header names as if the full header line was
336
       a value. Callers can usually handle large values better than
337
       large names. */
338
62.6k
    if (colon_pos > MESSAGE_HEADER_NAME_MAX_LEN) {
339
301
      line->name = "";
340
301
      line->name_len = 0;
341
301
      line->middle = uchar_empty_ptr;
342
301
      line->middle_len = 0;
343
301
      line->value = msg;
344
301
      line->value_len = size;
345
301
      line->full_value_offset = line->name_offset;
346
62.3k
    } else {
347
62.3k
      str_truncate(ctx->name, 0);
348
      /* use buffer_append() so the name won't be truncated if there
349
         are NULs. */
350
62.3k
      buffer_append(ctx->name, msg, colon_pos);
351
62.3k
      str_append_c(ctx->name, '\0');
352
353
      /* keep middle stored also in ctx->name so it's available
354
         with use_full_value */
355
62.3k
      line->middle = msg + colon_pos;
356
62.3k
      line->middle_len = (size_t)(line->value - line->middle);
357
62.3k
      str_append_data(ctx->name, line->middle, line->middle_len);
358
359
62.3k
      line->name = str_c(ctx->name);
360
62.3k
      line->name_len = colon_pos;
361
62.3k
      line->middle = str_data(ctx->name) + line->name_len + 1;
362
62.3k
    }
363
62.6k
  }
364
365
632k
  line->value_len = I_MIN(line->value_len, ctx->header_block_max_size);
366
632k
  size_t line_value_size = line->value_len;
367
632k
  size_t header_total_used = ctx->header_block_total_size + ctx->value_buf->used;
368
632k
  size_t line_available = ctx->header_block_max_size <= header_total_used ? 0 :
369
632k
        ctx->header_block_max_size - header_total_used;
370
632k
  line_value_size = I_MIN(line_value_size, line_available);
371
372
632k
  if (!line->continued) {
373
    /* first header line. make a copy of the line since we can't
374
       really trust input stream not to lose it. */
375
608k
    buffer_append(ctx->value_buf, line->value, line_value_size);
376
608k
    line->value = line->full_value = ctx->value_buf->data;
377
608k
    line->full_value_len = line->value_len = line_value_size;
378
608k
  } else if (line->use_full_value) {
379
    /* continue saving the full value. */
380
10.0k
    if (last_no_newline) {
381
      /* line is longer than fit into our buffer, so we
382
         were forced to break it into multiple
383
         message_header_lines */
384
10.0k
    } else if (line_value_size > 1) {
385
9.23k
      if (last_crlf && line_value_size > 2)
386
438
        buffer_append_c(ctx->value_buf, '\r');
387
9.23k
      buffer_append_c(ctx->value_buf, '\n');
388
9.23k
    }
389
10.0k
    if ((ctx->flags & MESSAGE_HEADER_PARSER_FLAG_CLEAN_ONELINE) != 0 &&
390
10.0k
        line->value_len > 0 && line->value[0] != ' ' &&
391
10.0k
        IS_LWSP(line->value[0]) &&
392
10.0k
        line_value_size > 0) {
393
0
      buffer_append_c(ctx->value_buf, ' ');
394
0
      buffer_append(ctx->value_buf, line->value + 1, line_value_size - 1);
395
0
    } else
396
10.0k
      buffer_append(ctx->value_buf, line->value, line_value_size);
397
398
10.0k
    line->full_value = ctx->value_buf->data;
399
10.0k
    line->full_value_len = ctx->value_buf->used;
400
14.4k
  } else {
401
    /* we didn't want full_value, and this is a continued line. */
402
14.4k
    line->full_value = NULL;
403
14.4k
    line->full_value_len = 0;
404
14.4k
  }
405
406
  /* always reset it */
407
632k
  line->use_full_value = FALSE;
408
409
632k
  if (ctx->hdr_size != NULL) {
410
632k
    ctx->hdr_size->physical_size += skip;
411
632k
    ctx->hdr_size->virtual_size += skip;
412
632k
  }
413
632k
  i_stream_skip(ctx->input, skip);
414
415
632k
  *hdr_r = line;
416
632k
  return 1;
417
636k
}
418
419
bool message_parse_header_has_nuls(const struct message_header_parser_ctx *ctx)
420
244k
{
421
244k
  return ctx->has_nuls;
422
244k
}
423
424
#undef message_parse_header
425
void message_parse_header(struct istream *input, struct message_size *hdr_size,
426
        enum message_header_parser_flags flags,
427
        message_header_callback_t *callback, void *context)
428
0
{
429
0
  struct message_header_parser_ctx *hdr_ctx;
430
0
  struct message_header_line *hdr;
431
0
  int ret;
432
433
0
  hdr_ctx = message_parse_header_init(input, hdr_size, flags);
434
0
  while ((ret = message_parse_header_next(hdr_ctx, &hdr)) > 0) T_BEGIN {
435
0
    callback(hdr, context);
436
0
  } T_END;
437
0
  i_assert(ret != 0);
438
0
  message_parse_header_deinit(&hdr_ctx);
439
440
  /* call after the final skipping */
441
0
  T_BEGIN {
442
0
    callback(NULL, context);
443
0
  } T_END;
444
0
}
445
446
void message_header_line_write(buffer_t *output,
447
             const struct message_header_line *hdr)
448
0
{
449
0
  if (!hdr->continued) {
450
0
    buffer_append(output, hdr->name, strlen(hdr->name));
451
0
    buffer_append(output, hdr->middle, hdr->middle_len);
452
0
  }
453
0
  buffer_append(output, hdr->value, hdr->value_len);
454
0
  if (!hdr->no_newline) {
455
0
    if (hdr->crlf_newline)
456
0
      buffer_append_c(output, '\r');
457
0
    buffer_append_c(output, '\n');
458
0
  }
459
0
}
460
461
const char *
462
message_header_strdup(pool_t pool, const unsigned char *data, size_t size)
463
0
{
464
0
  i_assert(data != NULL);
465
466
0
  if (memchr(data, '\0', size) == NULL) {
467
    /* fast path */
468
0
    char *dest = p_malloc(pool, size+1);
469
0
    memcpy(dest, data, size);
470
0
    return dest;
471
0
  }
472
473
  /* slow path - this could be made faster, but it should be
474
     rare so keep it simple */
475
0
  string_t *str = str_new(pool, size+2);
476
0
  for (size_t i = 0; i < size; i++) {
477
0
    if (data[i] != '\0')
478
0
      str_append_c(str, data[i]);
479
0
    else
480
0
      str_append(str, UNICODE_REPLACEMENT_CHAR_UTF8);
481
0
  }
482
0
  return str_c(str);
483
0
}
484
485
bool message_header_name_is_valid(const char *name)
486
0
{
487
  /*
488
    field-name      =   1*ftext
489
490
    ftext           =   %d33-57 /          ; Printable US-ASCII
491
            %d59-126           ;  characters not including
492
             ;  ":".
493
  */
494
0
  for (unsigned int i = 0; name[i] != '\0'; i++) {
495
0
    unsigned char c = name[i];
496
0
    if (c >= 33 && c <= 57) {
497
      /* before ":" */
498
0
    } else if (c >= 59 && c <= 126) {
499
      /* after ":" */
500
0
    } else {
501
0
      return FALSE;
502
0
    }
503
0
  }
504
0
  return TRUE;
505
0
}