/src/dovecot/src/lib-mail/message-parser.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* Copyright (c) 2002-2018 Dovecot authors, see the included COPYING file */ |
2 | | |
3 | | #include "lib.h" |
4 | | #include "array.h" |
5 | | #include "str.h" |
6 | | #include "istream.h" |
7 | | #include "rfc822-parser.h" |
8 | | #include "rfc2231-parser.h" |
9 | | #include "message-parser-private.h" |
10 | | |
11 | | message_part_header_callback_t *null_message_part_header_callback = NULL; |
12 | | |
13 | | static int parse_next_header_init(struct message_parser_ctx *ctx, |
14 | | struct message_block *block_r); |
15 | | static int parse_next_body_to_boundary(struct message_parser_ctx *ctx, |
16 | | struct message_block *block_r); |
17 | | static int parse_next_body_to_eof(struct message_parser_ctx *ctx, |
18 | | struct message_block *block_r); |
19 | | |
20 | | static struct message_boundary * |
21 | | boundary_find(struct message_boundary *boundaries, |
22 | | const unsigned char *data, size_t len, bool trailing_dashes) |
23 | 523k | { |
24 | 523k | struct message_boundary *best = NULL; |
25 | | |
26 | | /* As MIME spec says: search from latest one to oldest one so that we |
27 | | don't break if the same boundary is used in nested parts. Also the |
28 | | full message line doesn't have to match the boundary, only the |
29 | | beginning. However, if there are multiple prefixes whose beginning |
30 | | matches, use the longest matching one. */ |
31 | 1.48M | while (boundaries != NULL) { |
32 | 1.42M | if (boundaries->len <= len && |
33 | 1.42M | memcmp(boundaries->boundary, data, boundaries->len) == 0 && |
34 | 1.42M | (best == NULL || best->len < boundaries->len)) { |
35 | 517k | best = boundaries; |
36 | | /* If we see "foo--", it could either mean that there |
37 | | is a boundary named "foo" that ends now or there's |
38 | | a boundary "foo--" which continues. */ |
39 | 517k | if (best->len == len || |
40 | 517k | (best->len == len-2 && trailing_dashes)) { |
41 | | /* This is exactly the wanted boundary. There |
42 | | can't be a better one. */ |
43 | 468k | break; |
44 | 468k | } |
45 | 517k | } |
46 | | |
47 | 956k | boundaries = boundaries->next; |
48 | 956k | } |
49 | | |
50 | 523k | return best; |
51 | 523k | } |
52 | | |
53 | | static void parse_body_add_block(struct message_parser_ctx *ctx, |
54 | | struct message_block *block) |
55 | 450k | { |
56 | 450k | unsigned int missing_cr_count = 0; |
57 | 450k | const unsigned char *cur, *next, *data = block->data; |
58 | | |
59 | 450k | i_assert(block->size > 0); |
60 | | |
61 | 450k | block->hdr = NULL; |
62 | | |
63 | | /* check if we have NULs */ |
64 | 450k | if (memchr(data, '\0', block->size) != NULL) |
65 | 6.98k | ctx->part->flags |= MESSAGE_PART_FLAG_HAS_NULS; |
66 | | |
67 | | /* count number of lines and missing CRs */ |
68 | 450k | if (*data == '\n') { |
69 | 205k | ctx->part->body_size.lines++; |
70 | 205k | if (ctx->last_chr != '\r') |
71 | 205k | missing_cr_count++; |
72 | 205k | } |
73 | | |
74 | 450k | cur = data + 1; |
75 | 639k | while ((next = memchr(cur, '\n', block->size - (cur - data))) != NULL) { |
76 | 188k | ctx->part->body_size.lines++; |
77 | 188k | if (next[-1] != '\r') |
78 | 168k | missing_cr_count++; |
79 | | |
80 | 188k | cur = next + 1; |
81 | 188k | } |
82 | 450k | ctx->last_chr = data[block->size - 1]; |
83 | 450k | ctx->skip += block->size; |
84 | | |
85 | 450k | ctx->part->body_size.physical_size += block->size; |
86 | 450k | ctx->part->body_size.virtual_size += block->size + missing_cr_count; |
87 | 450k | } |
88 | | |
89 | | int message_parser_read_more(struct message_parser_ctx *ctx, |
90 | | struct message_block *block_r, bool *full_r) |
91 | 1.23M | { |
92 | 1.23M | int ret; |
93 | | |
94 | 1.23M | if (ctx->skip > 0) { |
95 | 450k | i_stream_skip(ctx->input, ctx->skip); |
96 | 450k | ctx->skip = 0; |
97 | 450k | } |
98 | | |
99 | 1.23M | *full_r = FALSE; |
100 | 1.23M | ret = i_stream_read_bytes(ctx->input, &block_r->data, |
101 | 1.23M | &block_r->size, ctx->want_count + 1); |
102 | 1.23M | if (ret <= 0) { |
103 | 11.1k | switch (ret) { |
104 | 0 | case 0: |
105 | 0 | if (!ctx->input->eof) { |
106 | 0 | i_assert(!ctx->input->blocking); |
107 | 0 | return 0; |
108 | 0 | } |
109 | 0 | break; |
110 | 11.1k | case -1: |
111 | 11.1k | i_assert(ctx->input->eof || |
112 | 11.1k | ctx->input->stream_errno != 0); |
113 | 11.1k | ctx->eof = TRUE; |
114 | 11.1k | if (block_r->size != 0) { |
115 | | /* EOF, but we still have some data. |
116 | | return it. */ |
117 | 1.76k | return 1; |
118 | 1.76k | } |
119 | 9.39k | return -1; |
120 | 0 | case -2: |
121 | 0 | *full_r = TRUE; |
122 | 0 | break; |
123 | 0 | default: |
124 | 0 | i_unreached(); |
125 | 11.1k | } |
126 | 11.1k | } |
127 | | |
128 | 1.22M | if (!*full_r) { |
129 | | /* reset number of wanted characters if we actually got them */ |
130 | 1.22M | ctx->want_count = 1; |
131 | 1.22M | } |
132 | 1.22M | return 1; |
133 | 1.23M | } |
134 | | |
135 | | static void |
136 | | message_part_append(struct message_parser_ctx *ctx) |
137 | 300k | { |
138 | 300k | struct message_part *parent = ctx->part; |
139 | 300k | struct message_part *part; |
140 | | |
141 | 300k | i_assert(!ctx->preparsed); |
142 | 300k | i_assert(parent != NULL); |
143 | 300k | i_assert((parent->flags & (MESSAGE_PART_FLAG_MULTIPART | |
144 | 300k | MESSAGE_PART_FLAG_MESSAGE_RFC822)) != 0); |
145 | | |
146 | 300k | part = p_new(ctx->part_pool, struct message_part, 1); |
147 | 300k | part->parent = parent; |
148 | | |
149 | | /* set child position */ |
150 | 300k | part->physical_pos = |
151 | 300k | parent->physical_pos + |
152 | 300k | parent->body_size.physical_size + |
153 | 300k | parent->header_size.physical_size; |
154 | | |
155 | | /* add to parent's linked list */ |
156 | 300k | *ctx->next_part = part; |
157 | | /* update the parent's end-of-linked-list pointer */ |
158 | 300k | struct message_part **next_part = &part->next; |
159 | 300k | array_push_back(&ctx->next_part_stack, &next_part); |
160 | | /* This part is now the new parent for the next message_part_append() |
161 | | call. Its linked list begins with the children pointer. */ |
162 | 300k | ctx->next_part = &part->children; |
163 | | |
164 | 300k | ctx->part = part; |
165 | 300k | ctx->nested_parts_count++; |
166 | 300k | ctx->total_parts_count++; |
167 | 300k | i_assert(ctx->nested_parts_count < ctx->max_nested_mime_parts); |
168 | 300k | i_assert(ctx->total_parts_count <= ctx->max_total_mime_parts); |
169 | 300k | } |
170 | | |
171 | | static void message_part_finish(struct message_parser_ctx *ctx) |
172 | 300k | { |
173 | 300k | struct message_part **const *parent_next_partp; |
174 | | |
175 | 300k | if (!ctx->preparsed) { |
176 | 300k | i_assert(ctx->nested_parts_count > 0); |
177 | 300k | ctx->nested_parts_count--; |
178 | | |
179 | 300k | parent_next_partp = array_back(&ctx->next_part_stack); |
180 | 300k | array_pop_back(&ctx->next_part_stack); |
181 | 300k | ctx->next_part = *parent_next_partp; |
182 | 300k | } |
183 | | |
184 | 300k | message_size_add(&ctx->part->parent->body_size, &ctx->part->body_size); |
185 | 300k | message_size_add(&ctx->part->parent->body_size, &ctx->part->header_size); |
186 | 300k | ctx->part->parent->children_count += 1 + ctx->part->children_count; |
187 | 300k | ctx->part = ctx->part->parent; |
188 | 300k | } |
189 | | |
190 | | static void message_boundary_free(struct message_boundary *b) |
191 | 10.1k | { |
192 | 10.1k | i_free(b->boundary); |
193 | 10.1k | i_free(b); |
194 | 10.1k | } |
195 | | |
196 | | static void |
197 | | boundary_remove_until(struct message_parser_ctx *ctx, |
198 | | struct message_boundary *boundary) |
199 | 226k | { |
200 | 236k | while (ctx->boundaries != boundary) { |
201 | 10.1k | struct message_boundary *cur = ctx->boundaries; |
202 | | |
203 | 10.1k | i_assert(cur != NULL); |
204 | 10.1k | ctx->boundaries = cur->next; |
205 | 10.1k | message_boundary_free(cur); |
206 | | |
207 | 10.1k | } |
208 | 226k | ctx->boundaries = boundary; |
209 | 226k | } |
210 | | |
211 | | static void parse_next_body_multipart_init(struct message_parser_ctx *ctx) |
212 | 10.1k | { |
213 | 10.1k | struct message_boundary *b; |
214 | | |
215 | 10.1k | b = i_new(struct message_boundary, 1); |
216 | 10.1k | b->part = ctx->part; |
217 | 10.1k | b->boundary = ctx->last_boundary; |
218 | 10.1k | ctx->last_boundary = NULL; |
219 | 10.1k | b->len = strlen(b->boundary); |
220 | | |
221 | 10.1k | b->next = ctx->boundaries; |
222 | 10.1k | ctx->boundaries = b; |
223 | 10.1k | } |
224 | | |
225 | | static int parse_next_body_message_rfc822_init(struct message_parser_ctx *ctx, |
226 | | struct message_block *block_r) |
227 | 81.9k | { |
228 | 81.9k | message_part_append(ctx); |
229 | 81.9k | return parse_next_header_init(ctx, block_r); |
230 | 81.9k | } |
231 | | |
232 | | static int |
233 | | boundary_line_find(struct message_parser_ctx *ctx, |
234 | | const unsigned char *data, size_t size, bool full, |
235 | | struct message_boundary **boundary_r) |
236 | 926k | { |
237 | 926k | *boundary_r = NULL; |
238 | | |
239 | 926k | if (size < 2) { |
240 | 458 | i_assert(!full); |
241 | | |
242 | 458 | if (ctx->input->eof) |
243 | 327 | return -1; |
244 | 131 | ctx->want_count = 2; |
245 | 131 | return 0; |
246 | 458 | } |
247 | | |
248 | 926k | if (data[0] != '-' || data[1] != '-') { |
249 | | /* not a boundary, just skip this line */ |
250 | 376k | return -1; |
251 | 376k | } |
252 | | |
253 | 549k | if (ctx->total_parts_count >= ctx->max_total_mime_parts) { |
254 | | /* can't add any more MIME parts. just stop trying to find |
255 | | more boundaries. */ |
256 | 25.2k | ctx->part->flags |= MESSAGE_PART_FLAG_OVERFLOW; |
257 | 25.2k | return -1; |
258 | 25.2k | } |
259 | | |
260 | | /* need to find the end of line */ |
261 | 524k | data += 2; |
262 | 524k | size -= 2; |
263 | 524k | const unsigned char *lf_pos = memchr(data, '\n', size); |
264 | 524k | if (lf_pos == NULL && |
265 | 524k | size+2 < BOUNDARY_END_MAX_LEN && |
266 | 524k | !ctx->input->eof && !full) { |
267 | | /* no LF found */ |
268 | 435 | ctx->want_count = BOUNDARY_END_MAX_LEN; |
269 | 435 | return 0; |
270 | 435 | } |
271 | 523k | size_t find_size = size; |
272 | 523k | bool trailing_dashes = FALSE; |
273 | | |
274 | 523k | if (lf_pos != NULL) { |
275 | 522k | find_size = lf_pos - data; |
276 | 522k | if (find_size > 0 && data[find_size-1] == '\r') |
277 | 1.39k | find_size--; |
278 | 522k | if (find_size > 2 && data[find_size-1] == '-' && |
279 | 522k | data[find_size-2] == '-') |
280 | 5.14k | trailing_dashes = TRUE; |
281 | 522k | } else if (find_size > BOUNDARY_END_MAX_LEN) |
282 | 57 | find_size = BOUNDARY_END_MAX_LEN; |
283 | | |
284 | 523k | *boundary_r = boundary_find(ctx->boundaries, data, find_size, |
285 | 523k | trailing_dashes); |
286 | 523k | if (*boundary_r == NULL) |
287 | 7.45k | return -1; |
288 | | |
289 | 516k | (*boundary_r)->epilogue_found = |
290 | 516k | size >= (*boundary_r)->len + 2 && |
291 | 516k | memcmp(data + (*boundary_r)->len, "--", 2) == 0; |
292 | 516k | return 1; |
293 | 523k | } |
294 | | |
295 | | static int parse_next_mime_header_init(struct message_parser_ctx *ctx, |
296 | | struct message_block *block_r) |
297 | 218k | { |
298 | 218k | message_part_append(ctx); |
299 | 218k | ctx->part->flags |= MESSAGE_PART_FLAG_IS_MIME; |
300 | | |
301 | 218k | return parse_next_header_init(ctx, block_r); |
302 | 218k | } |
303 | | |
304 | | static int parse_next_body_skip_boundary_line(struct message_parser_ctx *ctx, |
305 | | struct message_block *block_r) |
306 | 221k | { |
307 | 221k | const unsigned char *ptr; |
308 | 221k | int ret; |
309 | 221k | bool full; |
310 | | |
311 | 221k | if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0) |
312 | 274 | return ret; |
313 | | |
314 | 221k | ptr = memchr(block_r->data, '\n', block_r->size); |
315 | 221k | if (ptr == NULL) { |
316 | 153 | parse_body_add_block(ctx, block_r); |
317 | 153 | if (block_r->size > 0 && |
318 | 153 | (ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_BOUNDARIES) != 0) |
319 | 0 | return 1; |
320 | 153 | return 0; |
321 | 153 | } |
322 | | |
323 | | /* found the LF */ |
324 | 220k | block_r->size = (ptr - block_r->data) + 1; |
325 | 220k | parse_body_add_block(ctx, block_r); |
326 | | |
327 | 220k | if (ctx->boundaries == NULL || ctx->boundaries->part != ctx->part) { |
328 | | /* epilogue */ |
329 | 2.57k | if (ctx->boundaries != NULL) |
330 | 2.55k | ctx->parse_next_block = parse_next_body_to_boundary; |
331 | 28 | else |
332 | 28 | ctx->parse_next_block = parse_next_body_to_eof; |
333 | 218k | } else { |
334 | | /* a new MIME part begins */ |
335 | 218k | ctx->parse_next_block = parse_next_mime_header_init; |
336 | 218k | } |
337 | 220k | if (block_r->size > 0 && |
338 | 220k | (ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_BOUNDARIES) != 0) |
339 | 0 | return 1; |
340 | 220k | return ctx->parse_next_block(ctx, block_r); |
341 | 220k | } |
342 | | |
343 | | static int parse_part_finish(struct message_parser_ctx *ctx, |
344 | | struct message_boundary *boundary, |
345 | | struct message_block *block_r, bool first_line) |
346 | 221k | { |
347 | 221k | size_t line_size; |
348 | 221k | size_t boundary_len = boundary->len; |
349 | 221k | bool boundary_epilogue_found = boundary->epilogue_found; |
350 | | |
351 | 221k | i_assert(ctx->last_boundary == NULL); |
352 | | |
353 | | /* get back to parent MIME part, summing the child MIME part sizes |
354 | | into parent's body sizes */ |
355 | 515k | while (ctx->part != boundary->part) { |
356 | 294k | message_part_finish(ctx); |
357 | 294k | i_assert(ctx->part != NULL); |
358 | 294k | } |
359 | | |
360 | 221k | if (boundary->epilogue_found) { |
361 | | /* this boundary isn't needed anymore */ |
362 | 2.60k | boundary_remove_until(ctx, boundary->next); |
363 | 218k | } else { |
364 | | /* forget about the boundaries we possibly skipped */ |
365 | 218k | boundary_remove_until(ctx, boundary); |
366 | 218k | } |
367 | | |
368 | | /* the boundary itself should already be in buffer. add that. */ |
369 | 221k | block_r->data = i_stream_get_data(ctx->input, &block_r->size); |
370 | 221k | i_assert(block_r->size >= ctx->skip); |
371 | 221k | block_r->data += ctx->skip; |
372 | | /* [[\r]\n]--<boundary>[--] */ |
373 | 221k | if (first_line) |
374 | 211k | line_size = 0; |
375 | 9.91k | else if (block_r->data[0] == '\r') { |
376 | 3.85k | i_assert(block_r->data[1] == '\n'); |
377 | 3.85k | line_size = 2; |
378 | 6.06k | } else { |
379 | 6.06k | i_assert(block_r->data[0] == '\n'); |
380 | 6.06k | line_size = 1; |
381 | 6.06k | } |
382 | 221k | line_size += 2 + boundary_len + (boundary_epilogue_found ? 2 : 0); |
383 | 221k | i_assert(block_r->size >= ctx->skip + line_size); |
384 | 221k | block_r->size = line_size; |
385 | 221k | parse_body_add_block(ctx, block_r); |
386 | | |
387 | 221k | ctx->parse_next_block = parse_next_body_skip_boundary_line; |
388 | | |
389 | 221k | if ((ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_BOUNDARIES) != 0) |
390 | 0 | return 1; |
391 | 221k | return ctx->parse_next_block(ctx, block_r); |
392 | 221k | } |
393 | | |
394 | | static int parse_next_body_to_boundary(struct message_parser_ctx *ctx, |
395 | | struct message_block *block_r) |
396 | 230k | { |
397 | 230k | struct message_boundary *boundary = NULL; |
398 | 230k | const unsigned char *data, *cur, *next, *end; |
399 | 230k | size_t boundary_start; |
400 | 230k | int ret; |
401 | 230k | bool full; |
402 | | |
403 | 230k | if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0) |
404 | 1.33k | return ret; |
405 | | |
406 | 229k | data = block_r->data; |
407 | 229k | if (ctx->last_chr == '\n') { |
408 | | /* handle boundary in first line of message. alternatively |
409 | | it's an empty line. */ |
410 | 226k | ret = boundary_line_find(ctx, block_r->data, |
411 | 226k | block_r->size, full, &boundary); |
412 | 226k | if (ret >= 0) { |
413 | 211k | return ret == 0 ? 0 : |
414 | 211k | parse_part_finish(ctx, boundary, block_r, TRUE); |
415 | 211k | } |
416 | 226k | } |
417 | | |
418 | 229k | i_assert(block_r->size > 0); |
419 | 18.3k | boundary_start = 0; |
420 | | |
421 | | /* skip to beginning of the next line. the first line was |
422 | | handled already. */ |
423 | 18.3k | cur = data; end = data + block_r->size; |
424 | 126k | while ((next = memchr(cur, '\n', end - cur)) != NULL) { |
425 | 126k | cur = next + 1; |
426 | | |
427 | 126k | boundary_start = next - data; |
428 | 126k | if (next > data && next[-1] == '\r') |
429 | 22.5k | boundary_start--; |
430 | | |
431 | 126k | if (boundary_start != 0) { |
432 | | /* we can at least skip data until the first [CR]LF. |
433 | | input buffer can't be full anymore. */ |
434 | 113k | full = FALSE; |
435 | 113k | } |
436 | | |
437 | 126k | ret = boundary_line_find(ctx, cur, end - cur, full, &boundary); |
438 | 126k | if (ret >= 0) { |
439 | | /* found / need more data */ |
440 | 17.7k | if (ret == 0 && boundary_start == 0) |
441 | 60 | ctx->want_count += cur - block_r->data; |
442 | 17.7k | break; |
443 | 17.7k | } |
444 | 126k | } |
445 | | |
446 | 18.3k | if (next != NULL) { |
447 | | /* found / need more data */ |
448 | 17.7k | i_assert(ret >= 0); |
449 | 17.7k | i_assert(!(ret == 0 && full)); |
450 | 17.7k | } else if (boundary_start == 0) { |
451 | | /* no linefeeds in this block. we can just skip it. */ |
452 | 421 | ret = 0; |
453 | 421 | if (block_r->data[block_r->size-1] == '\r' && !ctx->eof) { |
454 | | /* this may be the beginning of the \r\n--boundary */ |
455 | 3 | block_r->size--; |
456 | 3 | } |
457 | 421 | boundary_start = block_r->size; |
458 | 421 | } else { |
459 | | /* the boundary wasn't found from this data block, |
460 | | we'll need more data. */ |
461 | 98 | ret = 0; |
462 | 98 | ctx->want_count = (block_r->size - boundary_start) + 1; |
463 | 98 | } |
464 | | |
465 | 18.3k | if (ret > 0 || (ret == 0 && !ctx->eof)) { |
466 | | /* a) we found the boundary |
467 | | b) we need more data and haven't reached EOF yet |
468 | | so leave CR+LF + last line to buffer */ |
469 | 17.8k | block_r->size = boundary_start; |
470 | 17.8k | } |
471 | 18.3k | if (block_r->size != 0) { |
472 | 8.33k | parse_body_add_block(ctx, block_r); |
473 | | |
474 | 8.33k | if ((ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) != 0 && |
475 | 8.33k | (ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS) == 0) |
476 | 0 | return 0; |
477 | | |
478 | 8.33k | return 1; |
479 | 8.33k | } |
480 | 9.97k | return ret <= 0 ? ret : |
481 | 9.97k | parse_part_finish(ctx, boundary, block_r, FALSE); |
482 | 18.3k | } |
483 | | |
484 | | static int parse_next_body_to_eof(struct message_parser_ctx *ctx, |
485 | | struct message_block *block_r) |
486 | 3.51k | { |
487 | 3.51k | bool full; |
488 | 3.51k | int ret; |
489 | | |
490 | 3.51k | if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0) |
491 | 3.37k | return ret; |
492 | | |
493 | 143 | parse_body_add_block(ctx, block_r); |
494 | | |
495 | 143 | if ((ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) != 0 && |
496 | 143 | (ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS) == 0) |
497 | 0 | return 0; |
498 | | |
499 | 143 | return 1; |
500 | 143 | } |
501 | | |
502 | | static void parse_content_type(struct message_parser_ctx *ctx, |
503 | | struct message_header_line *hdr) |
504 | 54.7k | { |
505 | 54.7k | struct rfc822_parser_context parser; |
506 | 54.7k | const char *const *results, *suffix; |
507 | 54.7k | string_t *content_type; |
508 | 54.7k | int ret; |
509 | | |
510 | 54.7k | if (ctx->part_seen_content_type) |
511 | 1.10k | return; |
512 | 53.6k | ctx->part_seen_content_type = TRUE; |
513 | | |
514 | 53.6k | rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL); |
515 | 53.6k | rfc822_skip_lwsp(&parser); |
516 | | |
517 | 53.6k | content_type = t_str_new(64); |
518 | 53.6k | ret = rfc822_parse_content_type(&parser, content_type); |
519 | | |
520 | 53.6k | if (strcasecmp(str_c(content_type), "message/rfc822") == 0) |
521 | 332 | ctx->part->flags |= MESSAGE_PART_FLAG_MESSAGE_RFC822; |
522 | 53.3k | else if (str_begins_icase(str_c(content_type), "text", &suffix) && |
523 | 53.3k | (suffix[0] == '\0' || suffix[0] == '/')) |
524 | 396 | ctx->part->flags |= MESSAGE_PART_FLAG_TEXT; |
525 | 52.9k | else if (str_begins_icase(str_c(content_type), "multipart/", &suffix)) { |
526 | 39.2k | ctx->part->flags |= MESSAGE_PART_FLAG_MULTIPART; |
527 | | |
528 | 39.2k | if (strcasecmp(suffix, "digest") == 0) |
529 | 3.46k | ctx->part->flags |= MESSAGE_PART_FLAG_MULTIPART_DIGEST; |
530 | 39.2k | } |
531 | | |
532 | 53.6k | if (ret < 0 || |
533 | 53.6k | (ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) == 0 || |
534 | 53.6k | ctx->last_boundary != NULL) { |
535 | 14.4k | rfc822_parser_deinit(&parser); |
536 | 14.4k | return; |
537 | 14.4k | } |
538 | | |
539 | 39.2k | rfc2231_parse(&parser, &results); |
540 | 7.02M | for (; *results != NULL; results += 2) { |
541 | 7.01M | if (strcasecmp(results[0], "boundary") == 0) { |
542 | | /* truncate excessively long boundaries */ |
543 | 22.9k | i_free(ctx->last_boundary); |
544 | 22.9k | ctx->last_boundary = |
545 | 22.9k | i_strndup(results[1], BOUNDARY_STRING_MAX_LEN); |
546 | 22.9k | break; |
547 | 22.9k | } |
548 | 7.01M | } |
549 | 39.2k | rfc822_parser_deinit(&parser); |
550 | 39.2k | } |
551 | | |
552 | | static bool block_is_at_eoh(const struct message_block *block) |
553 | 777k | { |
554 | 777k | if (block->size < 1) |
555 | 0 | return FALSE; |
556 | 777k | if (block->data[0] == '\n') |
557 | 55.0k | return TRUE; |
558 | 721k | if (block->data[0] == '\r') { |
559 | 3.09k | if (block->size < 2) |
560 | 13 | return FALSE; |
561 | 3.07k | if (block->data[1] == '\n') |
562 | 2.02k | return TRUE; |
563 | 3.07k | } |
564 | 719k | return FALSE; |
565 | 721k | } |
566 | | |
567 | | static bool parse_too_many_nested_mime_parts(struct message_parser_ctx *ctx) |
568 | 103k | { |
569 | 103k | return ctx->nested_parts_count+1 >= ctx->max_nested_mime_parts; |
570 | 103k | } |
571 | | |
572 | | #define MUTEX_FLAGS \ |
573 | | (MESSAGE_PART_FLAG_MESSAGE_RFC822 | MESSAGE_PART_FLAG_MULTIPART) |
574 | | |
575 | | static int parse_next_header(struct message_parser_ctx *ctx, |
576 | | struct message_block *block_r) |
577 | 781k | { |
578 | 781k | struct message_part *part = ctx->part; |
579 | 781k | struct message_header_line *hdr; |
580 | 781k | struct message_boundary *boundary; |
581 | 781k | bool full; |
582 | 781k | int ret; |
583 | | |
584 | 781k | if ((ret = message_parser_read_more(ctx, block_r, &full)) == 0) |
585 | 0 | return ret; |
586 | | |
587 | 781k | if (ret > 0 && block_is_at_eoh(block_r) && |
588 | 781k | ctx->last_boundary != NULL && |
589 | 781k | (part->flags & MESSAGE_PART_FLAG_IS_MIME) != 0) { |
590 | | /* we are at the end of headers and we've determined that we're |
591 | | going to start a multipart. add the boundary already here |
592 | | at this point so we can reliably determine whether the |
593 | | "\n--boundary" belongs to us or to a previous boundary. |
594 | | this is a problem if the boundary prefixes are identical, |
595 | | because MIME requires only the prefix to match. */ |
596 | 21.6k | if (!parse_too_many_nested_mime_parts(ctx)) { |
597 | 10.1k | parse_next_body_multipart_init(ctx); |
598 | 10.1k | ctx->multipart = TRUE; |
599 | 11.4k | } else { |
600 | 11.4k | part->flags |= MESSAGE_PART_FLAG_OVERFLOW; |
601 | 11.4k | part->flags &= ENUM_NEGATE(MESSAGE_PART_FLAG_MULTIPART); |
602 | 11.4k | } |
603 | 21.6k | } |
604 | | |
605 | | /* before parsing the header see if we can find a --boundary from here. |
606 | | we're guaranteed to be at the beginning of the line here. */ |
607 | 781k | if (ret > 0) { |
608 | 777k | ret = ctx->boundaries == NULL ? -1 : |
609 | 777k | boundary_line_find(ctx, block_r->data, |
610 | 574k | block_r->size, full, &boundary); |
611 | 777k | if (ret > 0 && boundary->part == ctx->part) { |
612 | | /* our own body begins with our own --boundary. |
613 | | we don't want to handle that yet. */ |
614 | 8.35k | ret = -1; |
615 | 8.35k | } |
616 | 777k | } |
617 | 781k | if (ret < 0) { |
618 | | /* no boundary */ |
619 | 501k | ret = message_parse_header_next(ctx->hdr_parser_ctx, &hdr); |
620 | 501k | if (ret == 0 || (ret < 0 && ctx->input->stream_errno != 0)) { |
621 | 0 | ctx->want_count = i_stream_get_data_size(ctx->input) + 1; |
622 | 0 | return ret; |
623 | 0 | } |
624 | 501k | } else if (ret == 0) { |
625 | | /* need more data */ |
626 | 382 | return 0; |
627 | 279k | } else { |
628 | | /* boundary found. stop parsing headers here. The previous |
629 | | [CR]LF belongs to the MIME boundary though. */ |
630 | 279k | if (ctx->prev_hdr_newline_size > 0) { |
631 | 59.5k | i_assert(ctx->part->header_size.lines > 0); |
632 | | /* remove the newline size from the MIME header */ |
633 | 59.5k | ctx->part->header_size.lines--; |
634 | 59.5k | ctx->part->header_size.physical_size -= |
635 | 59.5k | ctx->prev_hdr_newline_size; |
636 | 59.5k | ctx->part->header_size.virtual_size -= 2; |
637 | | /* add the newline size to the parent's body */ |
638 | 59.5k | ctx->part->parent->body_size.lines++; |
639 | 59.5k | ctx->part->parent->body_size.physical_size += |
640 | 59.5k | ctx->prev_hdr_newline_size; |
641 | 59.5k | ctx->part->parent->body_size.virtual_size += 2; |
642 | 59.5k | } |
643 | 279k | hdr = NULL; |
644 | 279k | } |
645 | | |
646 | 780k | if (hdr != NULL) { |
647 | 475k | if (hdr->eoh) |
648 | 51.2k | ; |
649 | 424k | else if (strcasecmp(hdr->name, "Mime-Version") == 0) { |
650 | | /* it's MIME. Content-* headers are valid */ |
651 | 267 | part->flags |= MESSAGE_PART_FLAG_IS_MIME; |
652 | 424k | } else if (strcasecmp(hdr->name, "Content-Type") == 0) { |
653 | 64.9k | if ((ctx->flags & |
654 | 64.9k | MESSAGE_PARSER_FLAG_MIME_VERSION_STRICT) == 0) |
655 | 64.9k | part->flags |= MESSAGE_PART_FLAG_IS_MIME; |
656 | | |
657 | 64.9k | if (hdr->continues) |
658 | 10.1k | hdr->use_full_value = TRUE; |
659 | 109k | else T_BEGIN { |
660 | 109k | parse_content_type(ctx, hdr); |
661 | 109k | } T_END; |
662 | 64.9k | } |
663 | | |
664 | 475k | block_r->hdr = hdr; |
665 | 475k | block_r->size = 0; |
666 | 475k | ctx->prev_hdr_newline_size = hdr->no_newline ? 0 : |
667 | 475k | (hdr->crlf_newline ? 2 : 1); |
668 | 475k | return 1; |
669 | 475k | } |
670 | | |
671 | | /* end of headers */ |
672 | 305k | if ((part->flags & MESSAGE_PART_FLAG_IS_MIME) == 0) { |
673 | | /* It's not MIME. Reset everything we found from |
674 | | Content-Type. */ |
675 | 81.2k | i_assert(!ctx->multipart); |
676 | 81.2k | part->flags = 0; |
677 | 81.2k | } |
678 | 305k | i_free(ctx->last_boundary); |
679 | | |
680 | 305k | if (!ctx->part_seen_content_type || |
681 | 305k | (part->flags & MESSAGE_PART_FLAG_IS_MIME) == 0) { |
682 | 251k | if (part->parent != NULL && |
683 | 251k | (part->parent->flags & |
684 | 250k | MESSAGE_PART_FLAG_MULTIPART_DIGEST) != 0) { |
685 | | /* when there's no content-type specified and we're |
686 | | below multipart/digest, assume message/rfc822 |
687 | | content-type */ |
688 | 81.8k | part->flags |= MESSAGE_PART_FLAG_MESSAGE_RFC822; |
689 | 169k | } else { |
690 | | /* otherwise we default to text/plain */ |
691 | 169k | part->flags |= MESSAGE_PART_FLAG_TEXT; |
692 | 169k | } |
693 | 251k | } |
694 | | |
695 | 305k | if (message_parse_header_has_nuls(ctx->hdr_parser_ctx)) |
696 | 15.4k | part->flags |= MESSAGE_PART_FLAG_HAS_NULS; |
697 | 305k | message_parse_header_deinit(&ctx->hdr_parser_ctx); |
698 | | |
699 | 305k | i_assert((part->flags & MUTEX_FLAGS) != MUTEX_FLAGS); |
700 | | |
701 | 305k | ctx->last_chr = '\n'; |
702 | 305k | if (ctx->multipart) { |
703 | 10.1k | i_assert(ctx->last_boundary == NULL); |
704 | 10.1k | ctx->multipart = FALSE; |
705 | 10.1k | ctx->parse_next_block = parse_next_body_to_boundary; |
706 | 295k | } else if ((part->flags & MESSAGE_PART_FLAG_MESSAGE_RFC822) == 0) { |
707 | | /* Not message/rfc822 */ |
708 | 212k | if (ctx->boundaries != NULL) |
709 | 209k | ctx->parse_next_block = parse_next_body_to_boundary; |
710 | 3.34k | else |
711 | 3.34k | ctx->parse_next_block = parse_next_body_to_eof; |
712 | 212k | } else if (!parse_too_many_nested_mime_parts(ctx) && |
713 | 82.1k | ctx->total_parts_count < ctx->max_total_mime_parts) { |
714 | | /* message/rfc822 - not reached MIME part limits yet */ |
715 | 81.9k | ctx->parse_next_block = parse_next_body_message_rfc822_init; |
716 | 81.9k | } else { |
717 | | /* message/rfc822 - already reached MIME part limits */ |
718 | 205 | part->flags |= MESSAGE_PART_FLAG_OVERFLOW; |
719 | 205 | part->flags &= ENUM_NEGATE(MESSAGE_PART_FLAG_MESSAGE_RFC822); |
720 | 205 | if (ctx->boundaries != NULL) |
721 | 204 | ctx->parse_next_block = parse_next_body_to_boundary; |
722 | 1 | else |
723 | 1 | ctx->parse_next_block = parse_next_body_to_eof; |
724 | 205 | } |
725 | | |
726 | 305k | ctx->want_count = 1; |
727 | | |
728 | | /* return empty block as end of headers */ |
729 | 305k | block_r->hdr = NULL; |
730 | 305k | block_r->size = 0; |
731 | 305k | return 1; |
732 | 305k | } |
733 | | |
734 | | static int parse_next_header_init(struct message_parser_ctx *ctx, |
735 | | struct message_block *block_r) |
736 | 305k | { |
737 | 305k | i_assert(ctx->hdr_parser_ctx == NULL); |
738 | | |
739 | 305k | ctx->hdr_parser_ctx = |
740 | 305k | message_parse_header_init(ctx->input, &ctx->part->header_size, |
741 | 305k | ctx->hdr_flags); |
742 | 305k | ctx->part_seen_content_type = FALSE; |
743 | 305k | ctx->prev_hdr_newline_size = 0; |
744 | | |
745 | 305k | ctx->parse_next_block = parse_next_header; |
746 | 305k | return parse_next_header(ctx, block_r); |
747 | 305k | } |
748 | | |
749 | | struct message_parser_ctx * |
750 | | message_parser_init_int(struct istream *input, |
751 | | const struct message_parser_settings *set) |
752 | 4.98k | { |
753 | 4.98k | struct message_parser_ctx *ctx; |
754 | | |
755 | 4.98k | ctx = i_new(struct message_parser_ctx, 1); |
756 | 4.98k | ctx->hdr_flags = set->hdr_flags; |
757 | 4.98k | ctx->flags = set->flags; |
758 | 4.98k | ctx->max_nested_mime_parts = set->max_nested_mime_parts != 0 ? |
759 | 0 | set->max_nested_mime_parts : |
760 | 4.98k | MESSAGE_PARSER_DEFAULT_MAX_NESTED_MIME_PARTS; |
761 | 4.98k | ctx->max_total_mime_parts = set->max_total_mime_parts != 0 ? |
762 | 0 | set->max_total_mime_parts : |
763 | 4.98k | MESSAGE_PARSER_DEFAULT_MAX_TOTAL_MIME_PARTS; |
764 | 4.98k | ctx->input = input; |
765 | 4.98k | i_stream_ref(input); |
766 | 4.98k | return ctx; |
767 | 4.98k | } |
768 | | |
769 | | struct message_parser_ctx * |
770 | | message_parser_init(pool_t part_pool, struct istream *input, |
771 | | const struct message_parser_settings *set) |
772 | 4.98k | { |
773 | 4.98k | struct message_parser_ctx *ctx; |
774 | | |
775 | 4.98k | ctx = message_parser_init_int(input, set); |
776 | 4.98k | ctx->part_pool = part_pool; |
777 | 4.98k | ctx->parts = ctx->part = p_new(part_pool, struct message_part, 1); |
778 | 4.98k | ctx->next_part = &ctx->part->children; |
779 | 4.98k | ctx->parse_next_block = parse_next_header_init; |
780 | 4.98k | ctx->total_parts_count = 1; |
781 | 4.98k | i_array_init(&ctx->next_part_stack, 4); |
782 | 4.98k | return ctx; |
783 | 4.98k | } |
784 | | |
785 | | void message_parser_deinit(struct message_parser_ctx **_ctx, |
786 | | struct message_part **parts_r) |
787 | 4.98k | { |
788 | 4.98k | const char *error; |
789 | | |
790 | 4.98k | i_assert((**_ctx).preparsed == FALSE); |
791 | 4.98k | if (message_parser_deinit_from_parts(_ctx, parts_r, &error) < 0) |
792 | 0 | i_panic("message_parser_deinit_from_parts: %s", error); |
793 | 4.98k | } |
794 | | |
795 | | int message_parser_deinit_from_parts(struct message_parser_ctx **_ctx, |
796 | | struct message_part **parts_r, |
797 | | const char **error_r) |
798 | 4.98k | { |
799 | 4.98k | struct message_parser_ctx *ctx = *_ctx; |
800 | 4.98k | int ret = ctx->broken_reason != NULL ? -1 : 0; |
801 | | |
802 | 4.98k | *_ctx = NULL; |
803 | 4.98k | *parts_r = ctx->parts; |
804 | 4.98k | *error_r = ctx->broken_reason; |
805 | | |
806 | 4.98k | if (ctx->hdr_parser_ctx != NULL) |
807 | 0 | message_parse_header_deinit(&ctx->hdr_parser_ctx); |
808 | 4.98k | if (ctx->part != NULL) { |
809 | | /* If the whole message has been parsed, the parts are |
810 | | usually finished in message_parser_parse_next_block(). |
811 | | However, it's possible that the caller finishes reading |
812 | | through the istream without calling |
813 | | message_parser_parse_next_block() afterwards. In that case |
814 | | we still need to finish these parts. */ |
815 | 4.98k | while (ctx->part->parent != NULL) |
816 | 0 | message_part_finish(ctx); |
817 | 4.98k | } |
818 | 4.98k | boundary_remove_until(ctx, NULL); |
819 | 4.98k | i_assert(ctx->nested_parts_count == 0); |
820 | | |
821 | 4.98k | i_stream_unref(&ctx->input); |
822 | 4.98k | array_free(&ctx->next_part_stack); |
823 | 4.98k | i_free(ctx->last_boundary); |
824 | 4.98k | i_free(ctx); |
825 | 4.98k | i_assert(ret < 0 || *parts_r != NULL); |
826 | 4.98k | return ret; |
827 | 4.98k | } |
828 | | |
829 | | int message_parser_parse_next_block(struct message_parser_ctx *ctx, |
830 | | struct message_block *block_r) |
831 | 794k | { |
832 | 794k | int ret; |
833 | 794k | bool eof = FALSE, full; |
834 | | |
835 | 794k | i_zero(block_r); |
836 | | |
837 | 794k | while ((ret = ctx->parse_next_block(ctx, block_r)) == 0) { |
838 | 602 | ret = message_parser_read_more(ctx, block_r, &full); |
839 | 602 | if (ret == 0) { |
840 | 0 | i_assert(!ctx->input->blocking); |
841 | 0 | return 0; |
842 | 0 | } |
843 | 602 | if (ret == -1) { |
844 | 153 | i_assert(!eof); |
845 | 153 | eof = TRUE; |
846 | 153 | } |
847 | 602 | } |
848 | | |
849 | 794k | block_r->part = ctx->part; |
850 | | |
851 | 794k | if (ret < 0 && ctx->part != NULL) { |
852 | | /* Successful EOF or unexpected failure */ |
853 | 4.98k | i_assert(ctx->input->eof || ctx->input->closed || |
854 | 4.98k | ctx->input->stream_errno != 0 || |
855 | 4.98k | ctx->broken_reason != NULL); |
856 | 10.6k | while (ctx->part->parent != NULL) |
857 | 5.62k | message_part_finish(ctx); |
858 | 4.98k | } |
859 | | |
860 | 794k | if (block_r->size == 0) { |
861 | | /* data isn't supposed to be read, so make sure it's NULL */ |
862 | 785k | block_r->data = NULL; |
863 | 785k | } |
864 | 794k | return ret; |
865 | 794k | } |
866 | | |
867 | | #undef message_parser_parse_header |
868 | | void message_parser_parse_header(struct message_parser_ctx *ctx, |
869 | | struct message_size *hdr_size, |
870 | | message_part_header_callback_t *callback, |
871 | | void *context) |
872 | 0 | { |
873 | 0 | struct message_block block; |
874 | 0 | int ret; |
875 | |
|
876 | 0 | while ((ret = message_parser_parse_next_block(ctx, &block)) > 0) { |
877 | 0 | T_BEGIN { |
878 | 0 | callback(block.part, block.hdr, context); |
879 | 0 | } T_END; |
880 | | |
881 | 0 | if (block.hdr == NULL) |
882 | 0 | break; |
883 | 0 | } |
884 | 0 | i_assert(ret != 0); |
885 | 0 | i_assert(ctx->part != NULL); |
886 | | |
887 | 0 | if (ret < 0) T_BEGIN { |
888 | | /* well, can't return error so fake end of headers */ |
889 | 0 | callback(ctx->part, NULL, context); |
890 | 0 | } T_END; |
891 | | |
892 | 0 | *hdr_size = ctx->part->header_size; |
893 | 0 | } |
894 | | |
895 | | #undef message_parser_parse_body |
896 | | void message_parser_parse_body(struct message_parser_ctx *ctx, |
897 | | message_part_header_callback_t *hdr_callback, |
898 | | void *context) |
899 | 0 | { |
900 | 0 | struct message_block block; |
901 | 0 | int ret; |
902 | |
|
903 | 0 | while ((ret = message_parser_parse_next_block(ctx, &block)) > 0) { |
904 | 0 | if (block.size == 0 && hdr_callback != NULL) T_BEGIN { |
905 | 0 | hdr_callback(block.part, block.hdr, context); |
906 | 0 | } T_END; |
907 | 0 | } |
908 | 0 | i_assert(ret != 0); |
909 | 0 | } |