Coverage Report

Created: 2026-01-25 07:08

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/dovecot/src/lib-mail/message-decoder.c
Line
Count
Source
1
/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */
2
3
#include "lib.h"
4
#include "buffer.h"
5
#include "base64.h"
6
#include "str.h"
7
#include "unichar.h"
8
#include "charset-utf8.h"
9
#include "qp-decoder.h"
10
#include "rfc822-parser.h"
11
#include "rfc2231-parser.h"
12
#include "message-parser.h"
13
#include "message-header-decode.h"
14
#include "message-decoder.h"
15
16
struct message_decoder_context {
17
  enum message_decoder_flags flags;
18
  normalizer_func_t *normalizer;
19
  struct message_part *prev_part;
20
21
  struct message_header_line hdr;
22
  buffer_t *buf, *buf2;
23
24
  char *charset_trans_charset;
25
  struct charset_translation *charset_trans;
26
  char translation_buf[CHARSET_MAX_PENDING_BUF_SIZE];
27
  size_t translation_size;
28
29
  struct qp_decoder *qp;
30
  struct base64_decoder base64_decoder;
31
32
  char *content_type, *content_charset;
33
  enum message_cte message_cte;
34
35
  bool binary_input:1;
36
};
37
38
static void
39
message_decode_body_init_charset(struct message_decoder_context *ctx,
40
         struct message_part *part);
41
42
struct message_decoder_context *
43
message_decoder_init(normalizer_func_t *normalizer,
44
         enum message_decoder_flags flags)
45
8.45k
{
46
8.45k
  struct message_decoder_context *ctx;
47
48
8.45k
  ctx = i_new(struct message_decoder_context, 1);
49
8.45k
  ctx->flags = flags;
50
8.45k
  ctx->normalizer = normalizer;
51
8.45k
  ctx->buf = buffer_create_dynamic(default_pool, 8192);
52
8.45k
  ctx->buf2 = buffer_create_dynamic(default_pool, 8192);
53
8.45k
  base64_decode_init(&ctx->base64_decoder, &base64_scheme, 0);
54
8.45k
  return ctx;
55
8.45k
}
56
57
void message_decoder_deinit(struct message_decoder_context **_ctx)
58
8.45k
{
59
8.45k
  struct message_decoder_context *ctx = *_ctx;
60
61
8.45k
  *_ctx = NULL;
62
63
8.45k
  if (ctx->charset_trans != NULL)
64
8.45k
    charset_to_utf8_end(&ctx->charset_trans);
65
8.45k
  if (ctx->qp != NULL)
66
609
    qp_decoder_deinit(&ctx->qp);
67
68
8.45k
  buffer_free(&ctx->buf);
69
8.45k
  buffer_free(&ctx->buf2);
70
8.45k
  i_free(ctx->charset_trans_charset);
71
8.45k
  i_free(ctx->content_type);
72
8.45k
  i_free(ctx->content_charset);
73
8.45k
  i_free(ctx);
74
8.45k
}
75
76
void message_decoder_set_return_binary(struct message_decoder_context *ctx,
77
               bool set)
78
0
{
79
0
  if (set)
80
0
    ctx->flags |= MESSAGE_DECODER_FLAG_RETURN_BINARY;
81
0
  else
82
0
    ctx->flags &= ENUM_NEGATE(MESSAGE_DECODER_FLAG_RETURN_BINARY);
83
0
  message_decode_body_init_charset(ctx, ctx->prev_part);
84
0
}
85
86
enum message_cte message_decoder_parse_cte(const struct message_header_line *hdr)
87
13.5k
{
88
13.5k
  struct rfc822_parser_context parser;
89
13.5k
  enum message_cte message_cte;
90
13.5k
  string_t *value;
91
92
13.5k
  value = t_str_new(64);
93
13.5k
  rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL);
94
95
13.5k
  rfc822_skip_lwsp(&parser);
96
97
  /* Ensure we do not accidentically accept confused values like
98
     'base64 binary' or embedded NULs */
99
13.5k
  if (rfc822_parse_mime_token(&parser, value) == 1) {
100
2.22k
    rfc822_skip_lwsp(&parser);
101
    /* RFC 2045 does not permit parameters for CTE,
102
       but in case someone uses them, we accept
103
       parameter separator ';' to be lenient. */
104
2.22k
    if (*parser.data != ';')
105
943
      return MESSAGE_CTE_UNKNOWN;
106
2.22k
  }
107
108
12.5k
  message_cte = MESSAGE_CTE_UNKNOWN;
109
12.5k
  switch (str_len(value)) {
110
490
  case 4:
111
490
    if (i_memcasecmp(str_data(value), "7bit", 4) == 0 ||
112
490
        i_memcasecmp(str_data(value), "8bit", 4) == 0)
113
0
      message_cte = MESSAGE_CTE_78BIT;
114
490
    break;
115
3.53k
  case 6:
116
3.53k
    if (i_memcasecmp(str_data(value), "base64", 6) == 0)
117
3.02k
      message_cte = MESSAGE_CTE_BASE64;
118
513
    else if (i_memcasecmp(str_data(value), "binary", 6) == 0)
119
0
      message_cte = MESSAGE_CTE_BINARY;
120
3.53k
    break;
121
5.06k
  case 16:
122
5.06k
    if (i_memcasecmp(str_data(value), "quoted-printable", 16) == 0)
123
4.62k
      message_cte = MESSAGE_CTE_QP;
124
5.06k
    break;
125
12.5k
  }
126
12.5k
  rfc822_parser_deinit(&parser);
127
12.5k
  return message_cte;
128
12.5k
}
129
130
static void
131
parse_content_type(struct message_decoder_context *ctx,
132
       struct message_header_line *hdr)
133
39.4k
{
134
39.4k
  struct rfc822_parser_context parser;
135
39.4k
  const char *const *results;
136
39.4k
  string_t *str;
137
39.4k
  int ret;
138
139
39.4k
  if (ctx->content_type != NULL)
140
1.11k
    return;
141
142
38.3k
  rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL);
143
38.3k
  rfc822_skip_lwsp(&parser);
144
38.3k
  str = t_str_new(64);
145
38.3k
  ret = rfc822_parse_content_type(&parser, str);
146
38.3k
  ctx->content_type = i_strdup(str_c(str));
147
38.3k
  if (ret < 0) {
148
8.70k
    rfc822_parser_deinit(&parser);
149
8.70k
    return;
150
8.70k
  }
151
152
29.6k
  rfc2231_parse(&parser, &results);
153
69.5k
  for (; *results != NULL; results += 2) {
154
43.0k
    if (strcasecmp(results[0], "charset") == 0) {
155
3.17k
      ctx->content_charset = i_strdup(results[1]);
156
3.17k
      break;
157
3.17k
    }
158
43.0k
  }
159
29.6k
  rfc822_parser_deinit(&parser);
160
29.6k
}
161
162
static bool message_decode_header(struct message_decoder_context *ctx,
163
          struct message_header_line *hdr,
164
          struct message_block *output)
165
310k
{
166
310k
  size_t value_len;
167
168
310k
  if (hdr->continues) {
169
27.2k
    hdr->use_full_value = TRUE;
170
27.2k
    return FALSE;
171
27.2k
  }
172
173
310k
  T_BEGIN {
174
283k
    if (hdr->name_len == 12 &&
175
41.1k
        strcasecmp(hdr->name, "Content-Type") == 0)
176
39.4k
      parse_content_type(ctx, hdr);
177
283k
    if (hdr->name_len == 25 &&
178
14.2k
        strcasecmp(hdr->name, "Content-Transfer-Encoding") == 0)
179
13.5k
      ctx->message_cte = message_decoder_parse_cte(hdr);
180
283k
  } T_END;
181
182
283k
  buffer_set_used_size(ctx->buf, 0);
183
283k
  message_header_decode_utf8(hdr->full_value, hdr->full_value_len,
184
283k
           ctx->buf, ctx->normalizer);
185
283k
  value_len = ctx->buf->used;
186
187
283k
  if (ctx->normalizer != NULL) {
188
283k
    (void)ctx->normalizer(hdr->name, hdr->name_len, ctx->buf);
189
283k
    buffer_append_c(ctx->buf, '\0');
190
283k
  } else {
191
0
    if (!uni_utf8_get_valid_data((const unsigned char *)hdr->name,
192
0
               hdr->name_len, ctx->buf))
193
0
      buffer_append_c(ctx->buf, '\0');
194
0
  }
195
196
283k
  ctx->hdr = *hdr;
197
283k
  ctx->hdr.full_value = ctx->buf->data;
198
283k
  ctx->hdr.full_value_len = value_len;
199
283k
  ctx->hdr.value_len = 0;
200
283k
  if (ctx->buf->used != value_len) {
201
283k
    ctx->hdr.name = CONST_PTR_OFFSET(ctx->buf->data,
202
283k
             ctx->hdr.full_value_len);
203
283k
    ctx->hdr.name_len = ctx->buf->used - 1 - value_len;
204
283k
  }
205
206
283k
  output->hdr = &ctx->hdr;
207
283k
  return TRUE;
208
283k
}
209
210
static void translation_buf_decode(struct message_decoder_context *ctx,
211
           const unsigned char **data, size_t *size)
212
3.32k
{
213
3.32k
  unsigned char trans_buf[CHARSET_MAX_PENDING_BUF_SIZE+1];
214
3.32k
  size_t data_wanted, skip;
215
3.32k
  size_t trans_size, orig_size;
216
217
  /* @UNSAFE: move the previously untranslated bytes to trans_buf
218
     and see if we have now enough data to get the next character
219
     translated */
220
3.32k
  memcpy(trans_buf, ctx->translation_buf, ctx->translation_size);
221
3.32k
  data_wanted = sizeof(trans_buf) - ctx->translation_size;
222
3.32k
  if (data_wanted > *size)
223
2.30k
    data_wanted = *size;
224
3.32k
  memcpy(trans_buf + ctx->translation_size, *data, data_wanted);
225
226
3.32k
  orig_size = trans_size = ctx->translation_size + data_wanted;
227
3.32k
  (void)charset_to_utf8(ctx->charset_trans, trans_buf,
228
3.32k
            &trans_size, ctx->buf2);
229
230
3.32k
  if (trans_size <= ctx->translation_size) {
231
    /* need more data to finish the translation. */
232
666
    i_assert(orig_size < CHARSET_MAX_PENDING_BUF_SIZE);
233
666
    memcpy(ctx->translation_buf, trans_buf, orig_size);
234
666
    ctx->translation_size = orig_size;
235
666
    *data += *size;
236
666
    *size = 0;
237
666
    return;
238
666
  }
239
2.65k
  skip = trans_size - ctx->translation_size;
240
241
2.65k
  i_assert(*size >= skip);
242
2.65k
  *data += skip;
243
2.65k
  *size -= skip;
244
245
2.65k
  ctx->translation_size = 0;
246
2.65k
}
247
248
static void
249
message_decode_body_init_charset(struct message_decoder_context *ctx,
250
         struct message_part *part)
251
265k
{
252
265k
  ctx->binary_input = ctx->content_charset == NULL &&
253
262k
    (ctx->flags & MESSAGE_DECODER_FLAG_RETURN_BINARY) != 0 &&
254
0
    (part->flags & (MESSAGE_PART_FLAG_TEXT |
255
0
        MESSAGE_PART_FLAG_MESSAGE_RFC822)) == 0;
256
257
265k
  if (ctx->binary_input)
258
0
    return;
259
260
265k
  if (ctx->charset_trans != NULL && ctx->content_charset != NULL &&
261
3.00k
      strcasecmp(ctx->content_charset, ctx->charset_trans_charset) == 0) {
262
    /* already have the correct translation selected */
263
1.00k
    charset_to_utf8_reset(ctx->charset_trans);
264
1.00k
    return;
265
1.00k
  }
266
267
264k
  if (ctx->charset_trans != NULL)
268
256k
    charset_to_utf8_end(&ctx->charset_trans);
269
264k
  i_free_and_null(ctx->charset_trans_charset);
270
271
264k
  ctx->charset_trans_charset = i_strdup(ctx->content_charset != NULL ?
272
262k
                ctx->content_charset : "UTF-8");
273
264k
  if (charset_to_utf8_begin(ctx->charset_trans_charset, ctx->normalizer,
274
264k
          &ctx->charset_trans) < 0)
275
1.51k
    ctx->charset_trans = charset_utf8_to_utf8_begin(ctx->normalizer);
276
264k
}
277
278
static bool message_decode_body(struct message_decoder_context *ctx,
279
        struct message_block *input,
280
        struct message_block *output)
281
26.1k
{
282
26.1k
  const unsigned char *data = NULL;
283
26.1k
  size_t pos, size = 0;
284
26.1k
  const char *error;
285
286
26.1k
  switch (ctx->message_cte) {
287
3.69k
  case MESSAGE_CTE_UNKNOWN:
288
    /* just skip this body */
289
3.69k
    return FALSE;
290
291
13.5k
  case MESSAGE_CTE_78BIT:
292
13.5k
  case MESSAGE_CTE_BINARY:
293
13.5k
    data = input->data;
294
13.5k
    size = input->size;
295
13.5k
    break;
296
4.02k
  case MESSAGE_CTE_QP: {
297
4.02k
    buffer_set_used_size(ctx->buf, 0);
298
4.02k
    if (ctx->qp == NULL)
299
609
      ctx->qp = qp_decoder_init(ctx->buf);
300
4.02k
    (void)qp_decoder_more(ctx->qp, input->data, input->size,
301
4.02k
              &pos, &error);
302
4.02k
    data = ctx->buf->data;
303
4.02k
    size = ctx->buf->used;
304
4.02k
    break;
305
13.5k
  }
306
4.87k
  case MESSAGE_CTE_BASE64:
307
4.87k
    buffer_set_used_size(ctx->buf, 0);
308
4.87k
    if (!base64_decode_is_finished(&ctx->base64_decoder)) {
309
4.34k
      if (base64_decode_more(&ctx->base64_decoder,
310
4.34k
                 input->data, input->size,
311
4.34k
                 &pos, ctx->buf) <= 0) {
312
        /* ignore the rest of the input in this
313
           MIME part */
314
1.62k
        (void)base64_decode_finish(&ctx->base64_decoder);
315
1.62k
      }
316
4.34k
    }
317
4.87k
    data = ctx->buf->data;
318
4.87k
    size = ctx->buf->used;
319
4.87k
    break;
320
26.1k
  }
321
322
22.4k
  if (ctx->binary_input) {
323
0
    output->data = data;
324
0
    output->size = size;
325
22.4k
  } else {
326
22.4k
    buffer_set_used_size(ctx->buf2, 0);
327
22.4k
    if (ctx->translation_size != 0)
328
3.32k
      translation_buf_decode(ctx, &data, &size);
329
330
22.4k
    pos = size;
331
22.4k
    (void)charset_to_utf8(ctx->charset_trans,
332
22.4k
              data, &pos, ctx->buf2);
333
22.4k
    if (pos != size) {
334
2.85k
      ctx->translation_size = size - pos;
335
2.85k
      i_assert(ctx->translation_size <=
336
2.85k
         sizeof(ctx->translation_buf));
337
2.85k
      memcpy(ctx->translation_buf, data + pos,
338
2.85k
             ctx->translation_size);
339
2.85k
    }
340
22.4k
    output->data = ctx->buf2->data;
341
22.4k
    output->size = ctx->buf2->used;
342
22.4k
  }
343
344
22.4k
  output->hdr = NULL;
345
22.4k
  return TRUE;
346
22.4k
}
347
348
bool message_decoder_decode_next_block(struct message_decoder_context *ctx,
349
               struct message_block *input,
350
               struct message_block *output)
351
602k
{
352
602k
  if (input->part != ctx->prev_part) {
353
    /* MIME part changed. */
354
266k
    message_decoder_decode_reset(ctx);
355
266k
  }
356
357
602k
  output->part = input->part;
358
602k
  ctx->prev_part = input->part;
359
360
602k
  if (input->hdr != NULL) {
361
310k
    output->size = 0;
362
310k
    return message_decode_header(ctx, input->hdr, output);
363
310k
  } else if (input->size != 0)
364
26.1k
    return message_decode_body(ctx, input, output);
365
265k
  else {
366
265k
    output->hdr = NULL;
367
265k
    output->size = 0;
368
265k
    message_decode_body_init_charset(ctx, input->part);
369
265k
    return TRUE;
370
265k
  }
371
602k
}
372
373
const char *
374
message_decoder_current_content_type(struct message_decoder_context *ctx)
375
0
{
376
0
  return ctx->content_type;
377
0
}
378
379
void message_decoder_decode_reset(struct message_decoder_context *ctx)
380
266k
{
381
266k
  const char *error;
382
383
266k
  base64_decode_reset(&ctx->base64_decoder);
384
385
266k
  if (ctx->qp != NULL)
386
25.6k
    (void)qp_decoder_finish(ctx->qp, &error);
387
266k
  i_free_and_null(ctx->content_type);
388
266k
  i_free_and_null(ctx->content_charset);
389
266k
  ctx->message_cte = MESSAGE_CTE_78BIT;
390
266k
}