Coverage Report

Created: 2025-07-23 07:29

/src/suricata7/libhtp/htp/htp_multipart.c
Line
Count
Source (jump to first uncovered line)
1
/***************************************************************************
2
 * Copyright (c) 2009-2010 Open Information Security Foundation
3
 * Copyright (c) 2010-2013 Qualys, Inc.
4
 * All rights reserved.
5
 * 
6
 * Redistribution and use in source and binary forms, with or without
7
 * modification, are permitted provided that the following conditions are
8
 * met:
9
 * 
10
 * - Redistributions of source code must retain the above copyright
11
 *   notice, this list of conditions and the following disclaimer.
12
13
 * - Redistributions in binary form must reproduce the above copyright
14
 *   notice, this list of conditions and the following disclaimer in the
15
 *   documentation and/or other materials provided with the distribution.
16
17
 * - Neither the name of the Qualys, Inc. nor the names of its
18
 *   contributors may be used to endorse or promote products derived from
19
 *   this software without specific prior written permission.
20
 * 
21
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25
 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
 ***************************************************************************/
33
34
/**
35
 * @file
36
 * @author Ivan Ristic <ivanr@webkreator.com>
37
 */
38
39
#include "htp_config_auto.h"
40
41
#include "htp_private.h"
42
43
/**
44
 * Determines the type of a Content-Disposition parameter.
45
 *
46
 * @param[in] data
47
 * @param[in] startpos
48
 * @param[in] pos
49
 * @return CD_PARAM_OTHER, CD_PARAM_NAME or CD_PARAM_FILENAME.
50
 */
51
0
static int htp_mpartp_cd_param_type(unsigned char *data, size_t startpos, size_t endpos) {
52
0
    if ((endpos - startpos) == 4) {
53
0
        if (memcmp(data + startpos, "name", 4) == 0) return CD_PARAM_NAME;
54
0
    } else if ((endpos - startpos) == 8) {
55
0
        if (memcmp(data + startpos, "filename", 8) == 0) return CD_PARAM_FILENAME;
56
0
    }
57
58
0
    return CD_PARAM_OTHER;
59
0
}
60
61
0
htp_multipart_t *htp_mpartp_get_multipart(htp_mpartp_t *parser) {
62
0
    return &(parser->multipart);
63
0
}
64
65
/**
66
 * Decodes a C-D header value. This is impossible to do correctly without a
67
 * parsing personality because most browsers are broken:
68
 *  - Firefox encodes " as \", and \ is not encoded.
69
 *  - Chrome encodes " as %22.
70
 *  - IE encodes " as \", and \ is not encoded.
71
 *  - Opera encodes " as \" and \ as \\.
72
 * @param[in] b
73
 */
74
0
static void htp_mpart_decode_quoted_cd_value_inplace(bstr *b) {
75
0
    unsigned char *s = bstr_ptr(b);
76
0
    unsigned char *d = bstr_ptr(b);
77
0
    size_t len = bstr_len(b);
78
0
    size_t pos = 0;
79
80
0
    while (pos < len) {
81
        // Ignore \ when before \ or ".
82
0
        if ((*s == '\\')&&(pos + 1 < len)&&((*(s + 1) == '"')||(*(s + 1) == '\\'))) {
83
0
            s++;
84
0
            pos++;
85
0
        }
86
87
0
        *d++ = *s++;
88
0
        pos++;
89
0
    }
90
91
0
    bstr_adjust_len(b, len - (s - d));
92
0
}
93
94
/**
95
 * Parses the Content-Disposition part header.
96
 *
97
 * @param[in] part
98
 * @return HTP_OK on success (header found and parsed), HTP_DECLINED if there is no C-D header or if
99
 *         it could not be processed, and HTP_ERROR on fatal error.
100
 */
101
0
htp_status_t htp_mpart_part_parse_c_d(htp_multipart_part_t *part) {
102
    // Find the C-D header.
103
0
    htp_header_t *h = htp_table_get_c(part->headers, "content-disposition");
104
0
    if (h == NULL) {        
105
0
        part->parser->multipart.flags |= HTP_MULTIPART_PART_UNKNOWN;
106
0
        return HTP_DECLINED;
107
0
    }
108
109
    // Require "form-data" at the beginning of the header.
110
0
    if (bstr_index_of_c(h->value, "form-data") != 0) {        
111
0
        part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
112
0
        return HTP_DECLINED;
113
0
    }
114
115
    // The parsing starts here.
116
0
    unsigned char *data = bstr_ptr(h->value);
117
0
    size_t len = bstr_len(h->value);
118
0
    size_t pos = 9; // Start after "form-data"
119
120
    // Main parameter parsing loop (once per parameter).
121
0
    while (pos < len) {              
122
        // Ignore whitespace.
123
0
        while ((pos < len) && isspace(data[pos])) pos++;
124
0
        if (pos == len) {            
125
0
            part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
126
0
            return HTP_DECLINED;
127
0
        }
128
129
        // Expecting a semicolon.
130
0
        if (data[pos] != ';') {            
131
0
            part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
132
0
            return HTP_DECLINED;
133
0
        }
134
0
        pos++;
135
136
        // Go over the whitespace before parameter name.
137
0
        while ((pos < len) && isspace(data[pos])) pos++;
138
0
        if (pos == len) {            
139
0
            part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
140
0
            return HTP_DECLINED;
141
0
        }
142
143
        // Found the starting position of the parameter name.
144
0
        size_t start = pos;
145
146
        // Look for the ending position.
147
0
        while ((pos < len) && (!isspace(data[pos]) && (data[pos] != '='))) pos++;
148
0
        if (pos == len) {            
149
0
            part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
150
0
            return HTP_DECLINED;
151
0
        }
152
153
        // Ending position is in "pos" now.
154
155
        // Determine parameter type ("name", "filename", or other).
156
0
        int param_type = htp_mpartp_cd_param_type(data, start, pos);        
157
158
        // Ignore whitespace after parameter name, if any.
159
0
        while ((pos < len) && isspace(data[pos])) pos++;
160
0
        if (pos == len) {            
161
0
            part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
162
0
            return HTP_DECLINED;
163
0
        }
164
165
        // Equals.
166
0
        if (data[pos] != '=') {            
167
0
            part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
168
0
            return HTP_DECLINED;
169
0
        }
170
0
        pos++;
171
172
        // Go over the whitespace before the parameter value.
173
0
        while ((pos < len) && isspace(data[pos])) pos++;
174
0
        if (pos == len) {            
175
0
            part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
176
0
            return HTP_DECLINED;
177
0
        }
178
        
179
        // Expecting a double quote.
180
0
        if (data[pos] != '"') {            
181
            // Bare string or non-standard quoting, which we don't like.
182
0
            part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
183
0
            return HTP_DECLINED;
184
0
        }        
185
186
0
        pos++; // Over the double quote.
187
188
        // We have the starting position of the value.
189
0
        start = pos;
190
191
        // Find the end of the value.
192
0
        while ((pos < len) && (data[pos] != '"')) {
193
            // Check for escaping.
194
0
            if (data[pos] == '\\') {
195
0
                if (pos + 1 >= len) {
196
                    // A backslash as the last character in the C-D header.
197
0
                    part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
198
0
                    return HTP_DECLINED;
199
0
                }
200
201
                // Allow " and \ to be escaped.
202
0
                if ((data[pos + 1] == '"')||(data[pos + 1] == '\\')) {
203
                    // Go over the quoted character.
204
0
                    pos++;
205
0
                }
206
0
            }
207
208
0
            pos++;
209
0
        }
210
211
        // If we've reached the end of the string that means the
212
        // value was not terminated properly (the second double quote is missing).
213
0
        if (pos == len) {            
214
0
            part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
215
0
            return HTP_DECLINED;
216
0
        }
217
218
        // Expecting the terminating double quote.
219
0
        if (data[pos] != '"') {            
220
0
            part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
221
0
            return HTP_DECLINED;
222
0
        }
223
224
0
        pos++; // Over the terminating double quote.
225
226
        // Finally, process the parameter value.
227
228
0
        switch (param_type) {
229
0
            case CD_PARAM_NAME:
230
                // Check that we have not seen the name parameter already.
231
0
                if (part->name != NULL) {                    
232
0
                    part->parser->multipart.flags |= HTP_MULTIPART_CD_PARAM_REPEATED;
233
0
                    return HTP_DECLINED;
234
0
                }
235
                
236
0
                part->name = bstr_dup_mem(data + start, pos - start - 1);
237
0
                if (part->name == NULL) return HTP_ERROR;
238
239
0
                htp_mpart_decode_quoted_cd_value_inplace(part->name);
240
241
0
                break;
242
243
0
            case CD_PARAM_FILENAME:                
244
                // Check that we have not seen the filename parameter already.
245
0
                if (part->file != NULL) {                    
246
0
                    part->parser->multipart.flags |= HTP_MULTIPART_CD_PARAM_REPEATED;
247
0
                    return HTP_DECLINED;
248
0
                }
249
 
250
0
                part->file = calloc(1, sizeof (htp_file_t));
251
0
                if (part->file == NULL) return HTP_ERROR;
252
253
0
                part->file->fd = -1;
254
0
                part->file->source = HTP_FILE_MULTIPART;
255
256
0
                part->file->filename = bstr_dup_mem(data + start, pos - start - 1);
257
0
                if (part->file->filename == NULL) {
258
0
                    free(part->file);
259
0
                    return HTP_ERROR;
260
0
                }
261
262
0
                htp_mpart_decode_quoted_cd_value_inplace(part->file->filename);
263
                
264
0
                break;
265
                
266
0
            default:
267
                // Unknown parameter.                
268
0
                part->parser->multipart.flags |= HTP_MULTIPART_CD_PARAM_UNKNOWN;
269
0
                return HTP_DECLINED;
270
0
                break;
271
0
        }       
272
273
        // Continue to parse the next parameter, if any.
274
0
    }
275
276
0
    return HTP_OK;
277
0
}
278
279
/**
280
 * Parses the Content-Type part header, if present.
281
 *
282
 * @param[in] part
283
 * @return HTP_OK on success, HTP_DECLINED if the C-T header is not present, and HTP_ERROR on failure.
284
 */
285
0
static htp_status_t htp_mpart_part_parse_c_t(htp_multipart_part_t *part) {
286
0
    htp_header_t *h = (htp_header_t *) htp_table_get_c(part->headers, "content-type");
287
0
    if (h == NULL) return HTP_DECLINED;
288
0
    return htp_parse_ct_header(h->value, &part->content_type);
289
0
}
290
291
/**
292
 * Processes part headers.
293
 *
294
 * @param[in] part
295
 * @return HTP_OK on success, HTP_ERROR on failure.
296
 */
297
0
htp_status_t htp_mpart_part_process_headers(htp_multipart_part_t *part) {
298
0
    if (htp_mpart_part_parse_c_d(part) == HTP_ERROR) return HTP_ERROR;
299
0
    if (htp_mpart_part_parse_c_t(part) == HTP_ERROR) return HTP_ERROR;
300
301
0
    return HTP_OK;
302
0
}
303
304
/**
305
 * Parses one part header.
306
 *
307
 * @param[in] part
308
 * @param[in] data
309
 * @param[in] len
310
 * @return HTP_OK on success, HTP_DECLINED on parsing error, HTP_ERROR on fatal error.
311
 */
312
0
htp_status_t htp_mpartp_parse_header(htp_multipart_part_t *part, const unsigned char *data, size_t len) {
313
0
    size_t name_start, name_end;
314
0
    size_t value_start, value_end;
315
   
316
    // We do not allow NUL bytes here.
317
0
    if (memchr(data, '\0', len) != NULL) {        
318
0
        part->parser->multipart.flags |= HTP_MULTIPART_NUL_BYTE;
319
0
        return HTP_DECLINED;
320
0
    }
321
322
0
    name_start = 0;
323
324
    // Look for the starting position of the name first.
325
0
    size_t colon_pos = 0;
326
327
0
    while ((colon_pos < len)&&(htp_is_space(data[colon_pos]))) colon_pos++;
328
0
    if (colon_pos != 0) {
329
        // Whitespace before header name.
330
0
        part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_INVALID;
331
0
        return HTP_DECLINED;
332
0
    }
333
334
    // Now look for the colon.
335
0
    while ((colon_pos < len) && (data[colon_pos] != ':')) colon_pos++;
336
337
0
    if (colon_pos == len) {
338
        // Missing colon.
339
0
        part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_INVALID;
340
0
        return HTP_DECLINED;
341
0
    }
342
343
0
    if (colon_pos == 0) {
344
        // Empty header name.
345
0
        part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_INVALID;
346
0
        return HTP_DECLINED;
347
0
    }
348
349
0
    name_end = colon_pos;
350
351
    // Ignore LWS after header name.
352
0
    size_t prev = name_end;
353
0
    while ((prev > name_start) && (htp_is_lws(data[prev - 1]))) {
354
0
        prev--;
355
0
        name_end--;
356
357
        // LWS after field name. Not allowing for now.
358
0
        part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_INVALID;
359
0
        return HTP_DECLINED;
360
0
    }
361
362
    // Header value.
363
364
0
    value_start = colon_pos + 1;
365
366
    // Ignore LWS before value.
367
0
    while ((value_start < len) && (htp_is_lws(data[value_start]))) value_start++;
368
369
0
    if (value_start == len) {
370
        // No header value.
371
0
        part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_INVALID;
372
0
        return HTP_DECLINED;
373
0
    }   
374
375
    // Assume the value is at the end.
376
0
    value_end = len;
377
378
    // Check that the header name is a token.
379
0
    size_t i = name_start;
380
0
    while (i < name_end) {
381
0
        if (!htp_is_token(data[i])) {
382
0
            part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_INVALID;
383
0
            return HTP_DECLINED;
384
0
        }
385
386
0
        i++;
387
0
    }
388
389
    // Now extract the name and the value.
390
0
    htp_header_t *h = calloc(1, sizeof (htp_header_t));
391
0
    if (h == NULL) return HTP_ERROR;
392
393
0
    h->name = bstr_dup_mem(data + name_start, name_end - name_start);
394
0
    if (h->name == NULL) {
395
0
        free(h);
396
0
        return HTP_ERROR;
397
0
    }
398
399
0
    h->value = bstr_dup_mem(data + value_start, value_end - value_start);
400
0
    if (h->value == NULL) {
401
0
        bstr_free(h->name);
402
0
        free(h);
403
0
        return HTP_ERROR;
404
0
    }
405
406
0
    if ((bstr_cmp_c_nocase(h->name, "content-disposition") != 0) && (bstr_cmp_c_nocase(h->name, "content-type") != 0)) {
407
0
        part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_UNKNOWN;
408
0
    }
409
410
    // Check if the header already exists.
411
0
    htp_header_t * h_existing = htp_table_get(part->headers, h->name);
412
0
    if (h_existing != NULL) {
413
        // Add to the existing header.
414
0
        bstr *new_value = bstr_expand(h_existing->value, bstr_len(h_existing->value)
415
0
                + 2 + bstr_len(h->value));
416
0
        if (new_value == NULL) {
417
0
            bstr_free(h->name);
418
0
            bstr_free(h->value);
419
0
            free(h);
420
0
            return HTP_ERROR;
421
0
        }
422
423
0
        h_existing->value = new_value;
424
0
        bstr_add_mem_noex(h_existing->value, ", ", 2);
425
0
        bstr_add_noex(h_existing->value, h->value);
426
427
        // The header is no longer needed.
428
0
        bstr_free(h->name);
429
0
        bstr_free(h->value);
430
0
        free(h);
431
432
        // Keep track of same-name headers.
433
0
        h_existing->flags |= HTP_MULTIPART_PART_HEADER_REPEATED;
434
0
        part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_REPEATED;
435
0
    } else {
436
        // Add as a new header.
437
0
        if (htp_table_add(part->headers, h->name, h) != HTP_OK) {
438
0
            bstr_free(h->value);
439
0
            bstr_free(h->name);
440
0
            free(h);
441
0
            return HTP_ERROR;
442
0
        }
443
0
    }
444
445
0
    return HTP_OK;
446
0
}
447
448
/**
449
 * Creates a new Multipart part.
450
 *
451
 * @param[in] parser
452
 * @return New part instance, or NULL on memory allocation failure.
453
 */
454
0
htp_multipart_part_t *htp_mpart_part_create(htp_mpartp_t *parser) {
455
0
    htp_multipart_part_t * part = calloc(1, sizeof (htp_multipart_part_t));
456
0
    if (part == NULL) return NULL;
457
458
0
    part->headers = htp_table_create(4);
459
0
    if (part->headers == NULL) {
460
0
        free(part);
461
0
        return NULL;
462
0
    }
463
464
0
    part->parser = parser;
465
0
    bstr_builder_clear(parser->part_data_pieces);
466
0
    bstr_builder_clear(parser->part_header_pieces);
467
468
0
    return part;
469
0
}
470
471
/**
472
 * Destroys a part.
473
 *
474
 * @param[in] part
475
 * @param[in] gave_up_data
476
 */
477
0
void htp_mpart_part_destroy(htp_multipart_part_t *part, int gave_up_data) {
478
0
    if (part == NULL) return;
479
480
0
    if (part->file != NULL) {
481
0
        bstr_free(part->file->filename);
482
483
0
        if (part->file->tmpname != NULL) {
484
0
            unlink(part->file->tmpname);
485
0
            free(part->file->tmpname);
486
0
        }
487
488
0
        free(part->file);
489
0
        part->file = NULL;
490
0
    }
491
492
0
    if ((!gave_up_data) || (part->type != MULTIPART_PART_TEXT)) {
493
0
        bstr_free(part->name);
494
0
        bstr_free(part->value);
495
0
    }
496
497
0
    bstr_free(part->content_type);
498
499
0
    if (part->headers != NULL) {
500
0
        htp_header_t *h = NULL;
501
0
        for (size_t i = 0, n = htp_table_size(part->headers); i < n; i++) {
502
0
            h = htp_table_get_index(part->headers, i, NULL);
503
0
            bstr_free(h->name);
504
0
            bstr_free(h->value);
505
0
            free(h);
506
0
        }
507
508
0
        htp_table_destroy(part->headers);
509
0
    }
510
511
0
    free(part);
512
0
}
513
514
/**
515
 * Finalizes part processing.
516
 *
517
 * @param[in] part
518
 * @return HTP_OK on success, HTP_ERROR on failure.
519
 */
520
0
htp_status_t htp_mpart_part_finalize_data(htp_multipart_part_t *part) {
521
    // Determine if this part is the epilogue.
522
523
0
    if (part->parser->multipart.flags & HTP_MULTIPART_SEEN_LAST_BOUNDARY) {
524
0
        if (part->type == MULTIPART_PART_UNKNOWN) {
525
            // Assume that the unknown part after the last boundary is the epilogue.            
526
0
            part->parser->current_part->type = MULTIPART_PART_EPILOGUE;
527
528
            // But if we've already seen a part we thought was the epilogue,
529
            // raise HTP_MULTIPART_PART_UNKNOWN. Multiple epilogues are not allowed.
530
0
            if (part->parser->multipart.flags & HTP_MULTIPART_HAS_EPILOGUE) {
531
0
                part->parser->multipart.flags |= HTP_MULTIPART_PART_UNKNOWN;
532
0
            }
533
534
0
            part->parser->multipart.flags |= HTP_MULTIPART_HAS_EPILOGUE;
535
0
        } else {
536
0
            part->parser->multipart.flags |= HTP_MULTIPART_PART_AFTER_LAST_BOUNDARY;
537
0
        }
538
0
    }
539
540
    // Sanity checks.
541
542
    // Have we seen complete part headers? If we have not, that means that the part ended prematurely.
543
0
    if ((part->parser->current_part->type != MULTIPART_PART_EPILOGUE) && (part->parser->current_part_mode != MODE_DATA)) {
544
0
        part->parser->multipart.flags |= HTP_MULTIPART_PART_INCOMPLETE;
545
0
    }
546
547
    // Have we been able to determine the part type? If not, this means
548
    // that the part did not contain the C-D header.
549
0
    if (part->type == MULTIPART_PART_UNKNOWN) {
550
0
        part->parser->multipart.flags |= HTP_MULTIPART_PART_UNKNOWN;
551
0
    }
552
553
    // Finalize part value.   
554
555
0
    if (part->type == MULTIPART_PART_FILE) {
556
        // Notify callbacks about the end of the file.
557
0
        htp_mpartp_run_request_file_data_hook(part, NULL, 0);
558
559
        // If we are storing the file to disk, close the file descriptor.
560
0
        if (part->file->fd != -1) {
561
0
            close(part->file->fd);
562
0
        }
563
0
    } else {
564
        // Combine value pieces into a single buffer.
565
0
        if (bstr_builder_size(part->parser->part_data_pieces) > 0) {
566
0
            part->value = bstr_builder_to_str(part->parser->part_data_pieces);
567
0
            bstr_builder_clear(part->parser->part_data_pieces);
568
0
        }
569
0
    }
570
571
0
    return HTP_OK;
572
0
}
573
574
0
htp_status_t htp_mpartp_run_request_file_data_hook(htp_multipart_part_t *part, const unsigned char *data, size_t len) {
575
0
    if (part->parser->cfg == NULL) return HTP_OK;
576
577
    // Keep track of the file length.
578
0
    part->file->len += len;
579
580
    // Package data for the callbacks.
581
0
    htp_file_data_t file_data;
582
0
    file_data.file = part->file;
583
0
    file_data.data = data;
584
0
    file_data.len = (const size_t) len;
585
586
    // Send data to callbacks
587
0
    htp_status_t rc = htp_hook_run_all(part->parser->cfg->hook_request_file_data, &file_data);
588
0
    if (rc != HTP_OK) return rc;
589
590
0
    return HTP_OK;
591
0
}
592
593
/**
594
 * Handles part data.
595
 *
596
 * @param[in] part
597
 * @param[in] data
598
 * @param[in] len
599
 * @param[in] is_line
600
 * @return HTP_OK on success, HTP_ERROR on failure.
601
 */
602
0
htp_status_t htp_mpart_part_handle_data(htp_multipart_part_t *part, const unsigned char *data, size_t len, int is_line) {
603
    #if HTP_DEBUG
604
    fprintf(stderr, "Part type %d mode %d is_line %d\n", part->type, part->parser->current_part_mode, is_line);
605
    fprint_raw_data(stderr, "htp_mpart_part_handle_data: data chunk", data, len);
606
    #endif
607
608
    // Keep track of raw part length.
609
0
    part->len += len;
610
611
    // If we're processing a part that came after the last boundary, then we're not sure if it
612
    // is the epilogue part or some other part (in case of evasion attempt). For that reason we
613
    // will keep all its data in the part_data_pieces structure. If it ends up not being the
614
    // epilogue, this structure will be cleared.
615
0
    if ((part->parser->multipart.flags & HTP_MULTIPART_SEEN_LAST_BOUNDARY) && (part->type == MULTIPART_PART_UNKNOWN)) {
616
0
        bstr_builder_append_mem(part->parser->part_data_pieces, data, len);
617
0
    }
618
619
0
    if (part->parser->current_part_mode == MODE_LINE) {
620
        // Line mode.       
621
622
0
        if (is_line) {
623
            // End of the line.
624
625
0
            bstr *line = NULL;
626
627
            // If this line came to us in pieces, combine them now into a single buffer.
628
0
            if (bstr_builder_size(part->parser->part_header_pieces) > 0) {
629
0
                bstr_builder_append_mem(part->parser->part_header_pieces, data, len);
630
0
                line = bstr_builder_to_str(part->parser->part_header_pieces);
631
0
                if (line == NULL) return HTP_ERROR;
632
0
                bstr_builder_clear(part->parser->part_header_pieces);
633
634
0
                data = bstr_ptr(line);
635
0
                len = bstr_len(line);
636
0
            }
637
638
            // Ignore the line endings.
639
0
            if (len > 1) {
640
0
                if (data[len - 1] == LF) len--;
641
0
                if (data[len - 1] == CR) len--;
642
0
            } else if (len > 0) {
643
0
                if (data[len - 1] == LF) len--;
644
0
            }
645
646
            // Is it an empty line?
647
0
            if (len == 0) {
648
                // Empty line; process headers and switch to data mode.
649
650
                // Process the pending header, if any.
651
0
                if (part->parser->pending_header_line != NULL) {
652
0
                    if (htp_mpartp_parse_header(part, bstr_ptr(part->parser->pending_header_line),
653
0
                            bstr_len(part->parser->pending_header_line)) == HTP_ERROR)
654
0
                    {
655
0
                        bstr_free(line);
656
0
                        return HTP_ERROR;
657
0
                    }
658
659
0
                    bstr_free(part->parser->pending_header_line);
660
0
                    part->parser->pending_header_line = NULL;
661
0
                }
662
663
0
                if (htp_mpart_part_process_headers(part) == HTP_ERROR) {
664
0
                    bstr_free(line);
665
0
                    return HTP_ERROR;
666
0
                }
667
668
0
                part->parser->current_part_mode = MODE_DATA;
669
0
                bstr_builder_clear(part->parser->part_header_pieces);
670
671
0
                if (part->file != NULL) {
672
                    // Changing part type because we have a filename.
673
0
                    part->type = MULTIPART_PART_FILE;
674
675
0
                    if ((part->parser->extract_files) && (part->parser->file_count < part->parser->extract_limit)) {
676
0
                        char buf[255];
677
                        
678
0
                        strncpy(buf, part->parser->extract_dir, 254);
679
0
                        strncat(buf, "/libhtp-multipart-file-XXXXXX", 254 - strlen(buf));
680
681
0
                        part->file->tmpname = strdup(buf);
682
0
                        if (part->file->tmpname == NULL) {
683
0
                            bstr_free(line);
684
0
                            return HTP_ERROR;
685
0
                        }
686
687
0
                        mode_t previous_mask = umask(S_IXUSR | S_IRWXG | S_IRWXO);
688
0
                        part->file->fd = mkstemp(part->file->tmpname);
689
0
                        umask(previous_mask);
690
691
0
                        if (part->file->fd < 0) {
692
0
                            bstr_free(line);
693
0
                            return HTP_ERROR;
694
0
                        }
695
696
0
                        part->parser->file_count++;
697
0
                    }
698
0
                } else if (part->name != NULL) {
699
                    // Changing part type because we have a name.
700
0
                    part->type = MULTIPART_PART_TEXT;
701
0
                    bstr_builder_clear(part->parser->part_data_pieces);
702
0
                } else {
703
                    // Do nothing; the type stays MULTIPART_PART_UNKNOWN.
704
0
                }
705
0
            } else {
706
                // Not an empty line.
707
708
                // Is there a pending header?
709
0
                if (part->parser->pending_header_line == NULL) {
710
0
                    if (line != NULL) {
711
0
                        part->parser->pending_header_line = line;
712
0
                        line = NULL;
713
0
                    } else {
714
0
                        part->parser->pending_header_line = bstr_dup_mem(data, len);
715
0
                        if (part->parser->pending_header_line == NULL) return HTP_ERROR;
716
0
                    }
717
0
                } else {
718
                    // Is this a folded line?
719
0
                    if (isspace(data[0])) {
720
                        // Folding; add to the existing line.
721
0
                        part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_FOLDING;
722
0
                        part->parser->pending_header_line = bstr_add_mem(part->parser->pending_header_line, data, len);
723
0
                        if (part->parser->pending_header_line == NULL) {
724
0
                            bstr_free(line);
725
0
                            return HTP_ERROR;
726
0
                        }
727
0
                    } else {
728
                        // Process the pending header line.                        
729
0
                        if (htp_mpartp_parse_header(part, bstr_ptr(part->parser->pending_header_line),
730
0
                                bstr_len(part->parser->pending_header_line)) == HTP_ERROR)
731
0
                        {
732
0
                            bstr_free(line);
733
0
                            return HTP_ERROR;
734
0
                        }
735
                        
736
0
                        bstr_free(part->parser->pending_header_line);
737
738
0
                        if (line != NULL) {
739
0
                            part->parser->pending_header_line = line;
740
0
                            line = NULL;
741
0
                        } else {
742
0
                            part->parser->pending_header_line = bstr_dup_mem(data, len);
743
0
                            if (part->parser->pending_header_line == NULL) return HTP_ERROR;
744
0
                        }
745
0
                    }
746
0
                }
747
0
            }
748
749
0
            bstr_free(line);
750
0
            line = NULL;
751
0
        } else {
752
            // Not end of line; keep the data chunk for later.
753
0
            bstr_builder_append_mem(part->parser->part_header_pieces, data, len);
754
0
        }
755
0
    } else {
756
        // Data mode; keep the data chunk for later (but not if it is a file).
757
0
        switch (part->type) {
758
0
            case MULTIPART_PART_EPILOGUE:
759
0
            case MULTIPART_PART_PREAMBLE:
760
0
            case MULTIPART_PART_TEXT:
761
0
            case MULTIPART_PART_UNKNOWN:
762
                // Make a copy of the data in RAM.
763
0
                bstr_builder_append_mem(part->parser->part_data_pieces, data, len);
764
0
                break;
765
766
0
            case MULTIPART_PART_FILE:
767
                // Invoke file data callbacks.
768
0
                htp_mpartp_run_request_file_data_hook(part, data, len);
769
770
                // Optionally, store the data in a file.
771
0
                if (part->file->fd != -1) {
772
0
                    if (write(part->file->fd, data, len) < 0) {
773
0
                        return HTP_ERROR;
774
0
                    }
775
0
                }
776
0
                break;
777
                
778
0
            default:
779
                // Internal error.
780
0
                return HTP_ERROR;
781
0
                break;
782
0
        }
783
0
    }
784
785
0
    return HTP_OK;
786
0
}
787
788
/**
789
 * Handles data, creating new parts as necessary.
790
 *
791
 * @param[in] mpartp
792
 * @param[in] data
793
 * @param[in] len
794
 * @param[in] is_line
795
 * @return HTP_OK on success, HTP_ERROR on failure.
796
 */
797
0
static htp_status_t htp_mpartp_handle_data(htp_mpartp_t *parser, const unsigned char *data, size_t len, int is_line) {
798
0
    if (len == 0) return HTP_OK;
799
800
    // Do we have a part already?
801
0
    if (parser->current_part == NULL) {
802
        // Create a new part.
803
0
        parser->current_part = htp_mpart_part_create(parser);
804
0
        if (parser->current_part == NULL) return HTP_ERROR;
805
806
0
        if (parser->multipart.boundary_count == 0) {
807
            // We haven't seen a boundary yet, so this must be the preamble part.
808
0
            parser->current_part->type = MULTIPART_PART_PREAMBLE;
809
0
            parser->multipart.flags |= HTP_MULTIPART_HAS_PREAMBLE;
810
0
            parser->current_part_mode = MODE_DATA;
811
0
        } else {
812
            // Part after preamble.
813
0
            parser->current_part_mode = MODE_LINE;
814
0
        }
815
816
        // Add part to the list.        
817
0
        htp_list_push(parser->multipart.parts, parser->current_part);
818
819
        #ifdef HTP_DEBUG
820
        fprintf(stderr, "Created new part type %d\n", parser->current_part->type);
821
        #endif
822
0
    }
823
824
    // Send data to the part.
825
0
    return htp_mpart_part_handle_data(parser->current_part, data, len, is_line);
826
0
}
827
828
/**
829
 * Handles a boundary event, which means that it will finalize a part if one exists.
830
 *
831
 * @param[in] mpartp
832
 * @return HTP_OK on success, HTP_ERROR on failure.
833
 */
834
0
static htp_status_t htp_mpartp_handle_boundary(htp_mpartp_t *parser) {
835
    #if HTP_DEBUG
836
    fprintf(stderr, "htp_mpartp_handle_boundary\n");
837
    #endif
838
839
0
    if (parser->current_part != NULL) {
840
0
        if (htp_mpart_part_finalize_data(parser->current_part) != HTP_OK) {
841
0
            return HTP_ERROR;
842
0
        }
843
844
        // We're done with this part
845
0
        parser->current_part = NULL;
846
847
        // Revert to line mode
848
0
        parser->current_part_mode = MODE_LINE;
849
0
    }
850
851
0
    return HTP_OK;
852
0
}
853
854
0
static htp_status_t htp_mpartp_init_boundary(htp_mpartp_t *parser, unsigned char *data, size_t len) {
855
0
    if ((parser == NULL) || (data == NULL)) return HTP_ERROR;
856
857
    // Copy the boundary and convert it to lowercase.
858
859
0
    parser->multipart.boundary_len = len + 4;
860
0
    parser->multipart.boundary = malloc(parser->multipart.boundary_len + 1);
861
0
    if (parser->multipart.boundary == NULL) return HTP_ERROR;
862
863
0
    parser->multipart.boundary[0] = CR;
864
0
    parser->multipart.boundary[1] = LF;
865
0
    parser->multipart.boundary[2] = '-';
866
0
    parser->multipart.boundary[3] = '-';
867
868
0
    for (size_t i = 0; i < len; i++) {
869
0
        parser->multipart.boundary[i + 4] = data[i];
870
0
    }
871
872
0
    parser->multipart.boundary[parser->multipart.boundary_len] = '\0';
873
874
    // We're starting in boundary-matching mode. The first boundary can appear without the
875
    // CRLF, and our starting state expects that. If we encounter non-boundary data, the
876
    // state will switch to data mode. Then, if the data is CRLF or LF, we will go back
877
    // to boundary matching. Thus, we handle all the possibilities.
878
879
0
    parser->parser_state = STATE_BOUNDARY;
880
0
    parser->boundary_match_pos = 2;
881
882
0
    return HTP_OK;
883
0
}
884
885
0
htp_mpartp_t *htp_mpartp_create(htp_cfg_t *cfg, bstr *boundary, uint64_t flags) {
886
0
    if ((cfg == NULL) || (boundary == NULL)) return NULL;
887
888
0
    htp_mpartp_t *parser = calloc(1, sizeof (htp_mpartp_t));
889
0
    if (parser == NULL) return NULL;
890
891
0
    parser->cfg = cfg;
892
893
0
    parser->boundary_pieces = bstr_builder_create();
894
0
    if (parser->boundary_pieces == NULL) {
895
0
        htp_mpartp_destroy(parser);
896
0
        return NULL;
897
0
    }
898
899
0
    parser->part_data_pieces = bstr_builder_create();
900
0
    if (parser->part_data_pieces == NULL) {
901
0
        htp_mpartp_destroy(parser);
902
0
        return NULL;
903
0
    }
904
905
0
    parser->part_header_pieces = bstr_builder_create();
906
0
    if (parser->part_header_pieces == NULL) {
907
0
        htp_mpartp_destroy(parser);
908
0
        return NULL;
909
0
    }
910
911
0
    parser->multipart.parts = htp_list_create(64);
912
0
    if (parser->multipart.parts == NULL) {
913
0
        htp_mpartp_destroy(parser);
914
0
        return NULL;
915
0
    }
916
917
0
    parser->multipart.flags = flags;
918
0
    parser->parser_state = STATE_INIT;
919
0
    parser->extract_files = cfg->extract_request_files;
920
0
    parser->extract_dir = cfg->tmpdir;
921
0
    if (cfg->extract_request_files_limit >= 0) {
922
0
        parser->extract_limit = cfg->extract_request_files_limit;
923
0
    } else {
924
0
        parser->extract_limit = DEFAULT_FILE_EXTRACT_LIMIT;
925
0
    }
926
0
    parser->handle_data = htp_mpartp_handle_data;
927
0
    parser->handle_boundary = htp_mpartp_handle_boundary;
928
929
    // Initialize the boundary.
930
0
    htp_status_t rc = htp_mpartp_init_boundary(parser, bstr_ptr(boundary), bstr_len(boundary));
931
0
    if (rc != HTP_OK) {
932
0
        htp_mpartp_destroy(parser);
933
0
        return NULL;
934
0
    }
935
936
    // On success, the ownership of the boundary parameter
937
    // is transferred to us. We made a copy, and so we
938
    // don't need it any more.
939
0
    bstr_free(boundary);
940
941
0
    return parser;
942
0
}
943
944
245k
void htp_mpartp_destroy(htp_mpartp_t *parser) {
945
245k
    if (parser == NULL) return;
946
947
0
    if (parser->multipart.boundary != NULL) {
948
0
        free(parser->multipart.boundary);
949
0
    }
950
951
0
    bstr_builder_destroy(parser->boundary_pieces);
952
0
    bstr_builder_destroy(parser->part_header_pieces);
953
0
    bstr_free(parser->pending_header_line);
954
0
    bstr_builder_destroy(parser->part_data_pieces);
955
956
    // Free the parts.
957
0
    if (parser->multipart.parts != NULL) {
958
0
        for (size_t i = 0, n = htp_list_size(parser->multipart.parts); i < n; i++) {
959
0
            htp_multipart_part_t * part = htp_list_get(parser->multipart.parts, i);
960
0
            htp_mpart_part_destroy(part, parser->gave_up_data);
961
0
        }
962
963
0
        htp_list_destroy(parser->multipart.parts);
964
0
    }
965
966
0
    free(parser);
967
0
}
968
969
/**
970
 * Processes set-aside data.
971
 *
972
 * @param[in] mpartp
973
 * @param[in] data
974
 * @param[in] pos
975
 * @param[in] startpos
976
 * @param[in] return_pos
977
 * @param[in] matched
978
 * @return HTP_OK on success, HTP_ERROR on failure.
979
 */
980
0
static htp_status_t htp_martp_process_aside(htp_mpartp_t *parser, int matched) {
981
    // The stored data pieces can contain up to one line. If we're in data mode and there
982
    // was no boundary match, things are straightforward -- we process everything as data.
983
    // If there was a match, we need to take care to not send the line ending as data, nor
984
    // anything that follows (because it's going to be a part of the boundary). Similarly,
985
    // when we are in line mode, we need to split the first data chunk, processing the first
986
    // part as line and the second part as data.
987
988
    #ifdef HTP_DEBUG
989
    fprintf(stderr, "mpartp_process_aside matched %d current_part_mode %d\n", matched, parser->current_part_mode);
990
    #endif
991
992
    // Do we need to do any chunk splitting?
993
0
    if (matched || (parser->current_part_mode == MODE_LINE)) {
994
        // Line mode or boundary match
995
996
        // Process the CR byte, if set aside.
997
0
        if ((!matched) && (parser->cr_aside)) {
998
            // Treat as part data, when there is not a match.
999
0
            parser->handle_data(parser, (unsigned char *) &"\r", 1, /* not a line */ 0);
1000
0
            parser->cr_aside = 0;
1001
0
        } else {
1002
            // Treat as boundary, when there is a match.
1003
0
            parser->cr_aside = 0;
1004
0
        }
1005
1006
        // We know that we went to match a boundary because
1007
        // we saw a new line. Now we have to find that line and
1008
        // process it. It's either going to be in the current chunk,
1009
        // or in the first stored chunk.
1010
0
        if (bstr_builder_size(parser->boundary_pieces) > 0) {
1011
0
            int first = 1;
1012
0
            for (size_t i = 0, n = htp_list_size(parser->boundary_pieces->pieces); i < n; i++) {
1013
0
                bstr *b = htp_list_get(parser->boundary_pieces->pieces, i);
1014
1015
0
                if (first) {
1016
0
                    first = 0;
1017
1018
                    // Split the first chunk.
1019
1020
0
                    if (!matched) {
1021
                        // In line mode, we are OK with line endings.
1022
0
                        parser->handle_data(parser, bstr_ptr(b), parser->boundary_candidate_pos, /* line */ 1);
1023
0
                    } else {
1024
                        // But if there was a match, the line ending belongs to the boundary.
1025
0
                        unsigned char *dx = bstr_ptr(b);
1026
0
                        size_t lx = parser->boundary_candidate_pos;
1027
1028
                        // Remove LF or CRLF.
1029
0
                        if ((lx > 0) && (dx[lx - 1] == LF)) {
1030
0
                            lx--;
1031
                            // Remove CR.
1032
0
                            if ((lx > 0) && (dx[lx - 1] == CR)) {
1033
0
                                lx--;
1034
0
                            }
1035
0
                        }
1036
1037
0
                        parser->handle_data(parser, dx, lx, /* not a line */ 0);
1038
0
                    }
1039
1040
                    // The second part of the split chunks belongs to the boundary
1041
                    // when matched, data otherwise.
1042
0
                    if (!matched) {
1043
0
                        parser->handle_data(parser, bstr_ptr(b) + parser->boundary_candidate_pos,
1044
0
                                bstr_len(b) - parser->boundary_candidate_pos, /* not a line */ 0);
1045
0
                    }
1046
0
                } else {
1047
                    // Do not send data if there was a boundary match. The stored
1048
                    // data belongs to the boundary.
1049
0
                    if (!matched) {
1050
0
                        parser->handle_data(parser, bstr_ptr(b), bstr_len(b), /* not a line */ 0);
1051
0
                    }
1052
0
                }
1053
0
            }
1054
1055
0
            bstr_builder_clear(parser->boundary_pieces);
1056
0
        }
1057
0
    } else {
1058
        // Data mode and no match.       
1059
1060
        // In data mode, we process the lone CR byte as data.
1061
0
        if (parser->cr_aside) {
1062
0
            parser->handle_data(parser, (const unsigned char *)&"\r", 1, /* not a line */ 0);
1063
0
            parser->cr_aside = 0;
1064
0
        }
1065
1066
        // We then process any pieces that we might have stored, also as data.
1067
0
        if (bstr_builder_size(parser->boundary_pieces) > 0) {
1068
0
            for (size_t i = 0, n = htp_list_size(parser->boundary_pieces->pieces); i < n; i++) {
1069
0
                bstr *b = htp_list_get(parser->boundary_pieces->pieces, i);
1070
0
                parser->handle_data(parser, bstr_ptr(b), bstr_len(b), /* not a line */ 0);
1071
0
            }
1072
1073
0
            bstr_builder_clear(parser->boundary_pieces);
1074
0
        }
1075
0
    }
1076
1077
0
    return HTP_OK;
1078
0
}
1079
1080
0
htp_status_t htp_mpartp_finalize(htp_mpartp_t *parser) {
1081
0
    if (parser->current_part != NULL) {
1082
        // Process buffered data, if any.
1083
0
        htp_martp_process_aside(parser, 0);
1084
1085
        // Finalize the last part.
1086
0
        if (htp_mpart_part_finalize_data(parser->current_part) != HTP_OK) return HTP_ERROR;
1087
1088
        // It is OK to end abruptly in the epilogue part, but not in any other.
1089
0
        if (parser->current_part->type != MULTIPART_PART_EPILOGUE) {
1090
0
            parser->multipart.flags |= HTP_MULTIPART_INCOMPLETE;
1091
0
        }
1092
0
    }
1093
1094
0
    bstr_builder_clear(parser->boundary_pieces);
1095
1096
0
    return HTP_OK;
1097
0
}
1098
1099
0
htp_status_t htp_mpartp_parse(htp_mpartp_t *parser, const void *_data, size_t len) {
1100
0
    unsigned char *data = (unsigned char *) _data;
1101
1102
    // The current position in the entire input buffer.
1103
0
    size_t pos = 0;
1104
1105
    // The position of the first unprocessed byte of data. We split the
1106
    // input buffer into smaller chunks, according to their purpose. Once
1107
    // an entire such smaller chunk is processed, we move to the next
1108
    // and update startpos.
1109
0
    size_t startpos = 0;
1110
1111
    // The position of the (possible) boundary. We investigate for possible
1112
    // boundaries whenever we encounter CRLF or just LF. If we don't find a
1113
    // boundary we need to go back, and this is what data_return_pos helps with.
1114
0
    size_t data_return_pos = 0;
1115
1116
    #if HTP_DEBUG
1117
    fprint_raw_data(stderr, "htp_mpartp_parse: data chunk", data, len);
1118
    #endif
1119
1120
    // While there's data in the input buffer.
1121
1122
0
    while (pos < len) {
1123
1124
0
STATE_SWITCH:
1125
        #if HTP_DEBUG        
1126
        fprintf(stderr, "htp_mpartp_parse: state %d pos %zd startpos %zd\n", parser->parser_state, pos, startpos);
1127
        #endif
1128
1129
0
        switch (parser->parser_state) {
1130
1131
0
            case STATE_INIT:
1132
                // Incomplete initialization.
1133
0
                return HTP_ERROR;
1134
0
                break;
1135
1136
0
            case STATE_DATA: // Handle part data.
1137
1138
                // While there's data in the input buffer.
1139
1140
0
                while (pos < len) {
1141
                    // Check for a CRLF-terminated line.
1142
0
                    if (data[pos] == CR) {
1143
                        // We have a CR byte.
1144
1145
                        // Is this CR the last byte in the input buffer?
1146
0
                        if (pos + 1 == len) {
1147
                            // We have CR as the last byte in input. We are going to process
1148
                            // what we have in the buffer as data, except for the CR byte,
1149
                            // which we're going to leave for later. If it happens that a
1150
                            // CR is followed by a LF and then a boundary, the CR is going
1151
                            // to be discarded.
1152
0
                            pos++; // Advance over CR.
1153
0
                            parser->cr_aside = 1;
1154
0
                        } else {
1155
                            // We have CR and at least one more byte in the buffer, so we
1156
                            // are able to test for the LF byte too.
1157
0
                            if (data[pos + 1] == LF) {
1158
0
                                pos += 2; // Advance over CR and LF.
1159
1160
0
                                parser->multipart.flags |= HTP_MULTIPART_CRLF_LINE;
1161
1162
                                // Prepare to switch to boundary testing.
1163
0
                                data_return_pos = pos;
1164
0
                                parser->boundary_candidate_pos = pos - startpos;
1165
0
                                parser->boundary_match_pos = 2; // After LF; position of the first dash.
1166
0
                                parser->parser_state = STATE_BOUNDARY;
1167
1168
0
                                goto STATE_SWITCH;
1169
0
                            } else {
1170
                                // This is not a new line; advance over the
1171
                                // byte and clear the CR set-aside flag.
1172
0
                                pos++;
1173
0
                                parser->cr_aside = 0;
1174
0
                            }
1175
0
                        }
1176
0
                    } else if (data[pos] == LF) { // Check for a LF-terminated line.
1177
0
                        pos++; // Advance over LF.
1178
1179
                        // Did we have a CR in the previous input chunk?
1180
0
                        if (parser->cr_aside == 0) {
1181
0
                            parser->multipart.flags |= HTP_MULTIPART_LF_LINE;
1182
0
                        } else {
1183
0
                            parser->multipart.flags |= HTP_MULTIPART_CRLF_LINE;
1184
0
                        }
1185
1186
                        // Prepare to switch to boundary testing.
1187
0
                        data_return_pos = pos;
1188
0
                        parser->boundary_candidate_pos = pos - startpos;
1189
0
                        parser->boundary_match_pos = 2; // After LF; position of the first dash.
1190
0
                        parser->parser_state = STATE_BOUNDARY;
1191
1192
0
                        goto STATE_SWITCH;
1193
0
                    } else {
1194
                        // Take one byte from input
1195
0
                        pos++;
1196
1197
                        // Earlier we might have set aside a CR byte not knowing if the next
1198
                        // byte is a LF. Now we know that it is not, and so we can release the CR.
1199
0
                        if (parser->cr_aside) {
1200
0
                            parser->handle_data(parser, (unsigned char *) &"\r", 1, /* not a line */ 0);
1201
0
                            parser->cr_aside = 0;
1202
0
                        }
1203
0
                    }
1204
0
                } // while               
1205
1206
                // No more data in the input buffer; process the data chunk.
1207
0
                parser->handle_data(parser, data + startpos, pos - startpos - parser->cr_aside, /* not a line */ 0);
1208
1209
0
                break;
1210
1211
0
            case STATE_BOUNDARY: // Handle a possible boundary.
1212
0
                while (pos < len) {
1213
                    #ifdef HTP_DEBUG
1214
                    fprintf(stderr, "boundary (len %zd pos %zd char %d) data char %d\n", parser->multipart.boundary_len,
1215
                            parser->boundary_match_pos, parser->multipart.boundary[parser->boundary_match_pos], tolower(data[pos]));
1216
                    #endif                   
1217
1218
                    // Check if the bytes match.
1219
0
                    if (!(data[pos] == parser->multipart.boundary[parser->boundary_match_pos])) {
1220
                        // Boundary mismatch.
1221
1222
                        // Process stored (buffered) data.
1223
0
                        htp_martp_process_aside(parser, /* no match */ 0);
1224
1225
                        // Return back where data parsing left off.
1226
0
                        if (parser->current_part_mode == MODE_LINE) {
1227
                            // In line mode, we process the line.
1228
0
                            parser->handle_data(parser, data + startpos, data_return_pos - startpos, /* line */ 1);
1229
0
                            startpos = data_return_pos;
1230
0
                        } else {
1231
                            // In data mode, we go back where we left off.
1232
0
                            pos = data_return_pos;
1233
0
                        }
1234
1235
0
                        parser->parser_state = STATE_DATA;
1236
1237
0
                        goto STATE_SWITCH;
1238
0
                    }
1239
1240
                    // Consume one matched boundary byte
1241
0
                    pos++;
1242
0
                    parser->boundary_match_pos++;
1243
1244
                    // Have we seen all boundary bytes?
1245
0
                    if (parser->boundary_match_pos == parser->multipart.boundary_len) {
1246
                        // Boundary match!
1247
1248
                        // Process stored (buffered) data.
1249
0
                        htp_martp_process_aside(parser, /* boundary match */ 1);
1250
1251
                        // Process data prior to the boundary in the current input buffer.
1252
                        // Because we know this is the last chunk before boundary, we can
1253
                        // remove the line endings.
1254
0
                        size_t dlen = data_return_pos - startpos;
1255
0
                        if ((dlen > 0) && (data[startpos + dlen - 1] == LF)) dlen--;
1256
0
                        if ((dlen > 0) && (data[startpos + dlen - 1] == CR)) dlen--;
1257
0
                        parser->handle_data(parser, data + startpos, dlen, /* line */ 1);
1258
1259
                        // Keep track of how many boundaries we've seen.
1260
0
                        parser->multipart.boundary_count++;
1261
1262
0
                        if (parser->multipart.flags & HTP_MULTIPART_SEEN_LAST_BOUNDARY) {
1263
0
                            parser->multipart.flags |= HTP_MULTIPART_PART_AFTER_LAST_BOUNDARY;
1264
0
                        }
1265
1266
                        // Run boundary match.
1267
0
                        parser->handle_boundary(parser);
1268
1269
                        // We now need to check if this is the last boundary in the payload
1270
0
                        parser->parser_state = STATE_BOUNDARY_IS_LAST2;
1271
1272
0
                        goto STATE_SWITCH;
1273
0
                    }
1274
0
                } // while
1275
1276
                // No more data in the input buffer; store (buffer) the unprocessed
1277
                // part for later, for after we find out if this is a boundary.
1278
0
                bstr_builder_append_mem(parser->boundary_pieces, data + startpos, len - startpos);
1279
1280
0
                break;
1281
1282
0
            case STATE_BOUNDARY_IS_LAST2:
1283
                // Examine the first byte after the last boundary character. If it is
1284
                // a dash, then we maybe processing the last boundary in the payload. If
1285
                // it is not, move to eat all bytes until the end of the line.
1286
1287
0
                if (data[pos] == '-') {
1288
                    // Found one dash, now go to check the next position.
1289
0
                    pos++;
1290
0
                    parser->parser_state = STATE_BOUNDARY_IS_LAST1;
1291
0
                } else {
1292
                    // This is not the last boundary. Change state but
1293
                    // do not advance the position, allowing the next
1294
                    // state to process the byte.
1295
0
                    parser->parser_state = STATE_BOUNDARY_EAT_LWS;
1296
0
                }
1297
0
                break;
1298
1299
0
            case STATE_BOUNDARY_IS_LAST1:
1300
                // Examine the byte after the first dash; expected to be another dash.
1301
                // If not, eat all bytes until the end of the line.
1302
1303
0
                if (data[pos] == '-') {
1304
                    // This is indeed the last boundary in the payload.
1305
0
                    pos++;
1306
0
                    parser->multipart.flags |= HTP_MULTIPART_SEEN_LAST_BOUNDARY;
1307
0
                    parser->parser_state = STATE_BOUNDARY_EAT_LWS;
1308
0
                } else {
1309
                    // The second character is not a dash, and so this is not
1310
                    // the final boundary. Raise the flag for the first dash,
1311
                    // and change state to consume the rest of the boundary line.
1312
0
                    parser->multipart.flags |= HTP_MULTIPART_BBOUNDARY_NLWS_AFTER;
1313
0
                    parser->parser_state = STATE_BOUNDARY_EAT_LWS;
1314
0
                }
1315
0
                break;
1316
1317
0
            case STATE_BOUNDARY_EAT_LWS:
1318
0
                if (data[pos] == CR) {
1319
                    // CR byte, which could indicate a CRLF line ending.
1320
0
                    pos++;
1321
0
                    parser->parser_state = STATE_BOUNDARY_EAT_LWS_CR;
1322
0
                } else if (data[pos] == LF) {
1323
                    // LF line ending; we're done with boundary processing; data bytes follow.
1324
0
                    pos++;
1325
0
                    startpos = pos;
1326
0
                    parser->multipart.flags |= HTP_MULTIPART_LF_LINE;
1327
0
                    parser->parser_state = STATE_DATA;
1328
0
                } else {
1329
0
                    if (htp_is_lws(data[pos])) {
1330
                        // Linear white space is allowed here.
1331
0
                        parser->multipart.flags |= HTP_MULTIPART_BBOUNDARY_LWS_AFTER;
1332
0
                        pos++;
1333
0
                    } else {
1334
                        // Unexpected byte; consume, but remain in the same state.
1335
0
                        parser->multipart.flags |= HTP_MULTIPART_BBOUNDARY_NLWS_AFTER;
1336
0
                        pos++;
1337
0
                    }
1338
0
                }
1339
0
                break;
1340
1341
0
            case STATE_BOUNDARY_EAT_LWS_CR:
1342
0
                if (data[pos] == LF) {
1343
                    // CRLF line ending; we're done with boundary processing; data bytes follow.
1344
0
                    pos++;
1345
0
                    startpos = pos;
1346
0
                    parser->multipart.flags |= HTP_MULTIPART_CRLF_LINE;
1347
0
                    parser->parser_state = STATE_DATA;
1348
0
                } else {
1349
                    // Not a line ending; start again, but do not process this byte.
1350
0
                    parser->multipart.flags |= HTP_MULTIPART_BBOUNDARY_NLWS_AFTER;
1351
0
                    parser->parser_state = STATE_BOUNDARY_EAT_LWS;
1352
0
                }
1353
0
                break;
1354
0
        } // switch
1355
0
    }
1356
1357
0
    return HTP_OK;
1358
0
}
1359
1360
0
static void htp_mpartp_validate_boundary(bstr *boundary, uint64_t *flags) {
1361
    /*
1362
1363
    RFC 1341:
1364
1365
    The only mandatory parameter for the multipart  Content-Type
1366
    is  the  boundary  parameter,  which  consists  of  1  to 70
1367
    characters from a set of characters known to be very  robust
1368
    through  email  gateways,  and  NOT ending with white space.
1369
    (If a boundary appears to end with white  space,  the  white
1370
    space  must be presumed to have been added by a gateway, and
1371
    should  be  deleted.)   It  is  formally  specified  by  the
1372
    following BNF:
1373
1374
    boundary := 0*69<bchars> bcharsnospace
1375
1376
    bchars := bcharsnospace / " "
1377
1378
    bcharsnospace :=    DIGIT / ALPHA / "'" / "(" / ")" / "+" / "_"
1379
                          / "," / "-" / "." / "/" / ":" / "=" / "?"
1380
     */
1381
1382
    /*
1383
     Chrome: Content-Type: multipart/form-data; boundary=----WebKitFormBoundaryT4AfwQCOgIxNVwlD
1384
    Firefox: Content-Type: multipart/form-data; boundary=---------------------------21071316483088
1385
       MSIE: Content-Type: multipart/form-data; boundary=---------------------------7dd13e11c0452
1386
      Opera: Content-Type: multipart/form-data; boundary=----------2JL5oh7QWEDwyBllIRc7fh
1387
     Safari: Content-Type: multipart/form-data; boundary=----WebKitFormBoundaryre6zL3b0BelnTY5S
1388
     */
1389
1390
0
    unsigned char *data = bstr_ptr(boundary);
1391
0
    size_t len = bstr_len(boundary);
1392
1393
    // The RFC allows up to 70 characters. In real life,
1394
    // boundaries tend to be shorter.
1395
0
    if ((len == 0) || (len > 70)) {
1396
0
        *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
1397
0
    }
1398
1399
    // Check boundary characters. This check is stricter than the
1400
    // RFC, which seems to allow many separator characters.
1401
0
    size_t pos = 0;
1402
0
    while (pos < len) {
1403
0
        if (!(((data[pos] >= '0') && (data[pos] <= '9'))
1404
0
                || ((data[pos] >= 'a') && (data[pos] <= 'z'))
1405
0
                || ((data[pos] >= 'A') && (data[pos] <= 'Z'))
1406
0
                || (data[pos] == '-'))) {
1407
1408
0
            switch (data[pos]) {
1409
0
                case '\'':
1410
0
                case '(':
1411
0
                case ')':
1412
0
                case '+':
1413
0
                case '_':
1414
0
                case ',':
1415
0
                case '.':
1416
0
                case '/':
1417
0
                case ':':
1418
0
                case '=':
1419
0
                case '?':
1420
                    // These characters are allowed by the RFC, but not common.
1421
0
                    *flags |= HTP_MULTIPART_HBOUNDARY_UNUSUAL;
1422
0
                    break;
1423
                    
1424
0
                default:
1425
                    // Invalid character.
1426
0
                    *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
1427
0
                    break;
1428
0
            }
1429
0
        }
1430
1431
0
        pos++;
1432
0
    }
1433
0
}
1434
1435
0
static void htp_mpartp_validate_content_type(bstr *content_type, uint64_t *flags) {
1436
0
    unsigned char *data = bstr_ptr(content_type);
1437
0
    size_t len = bstr_len(content_type);
1438
0
    size_t counter = 0;
1439
1440
0
    while (len > 0) {
1441
0
        int i = bstr_util_mem_index_of_c_nocase(data, len, "boundary");
1442
0
        if (i == -1) break;
1443
1444
0
        data = data + i;
1445
0
        len = len - i;
1446
1447
        // In order to work around the fact that WebKit actually uses
1448
        // the word "boundary" in their boundary, we also require one
1449
        // equals character the follow the words.
1450
        // "multipart/form-data; boundary=----WebKitFormBoundaryT4AfwQCOgIxNVwlD"
1451
0
        if (memchr(data, '=', len) == NULL) break;
1452
1453
0
        counter++;
1454
1455
        // Check for case variations.        
1456
0
        for (size_t j = 0; j < 8; j++) {
1457
0
            if (!((*data >= 'a') && (*data <= 'z'))) {
1458
0
                *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
1459
0
            }
1460
1461
0
            data++;
1462
0
            len--;
1463
0
        }
1464
0
    }
1465
1466
    // How many boundaries have we seen?
1467
0
    if (counter > 1) {
1468
0
        *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
1469
0
    }
1470
0
}
1471
1472
0
htp_status_t htp_mpartp_find_boundary(bstr *content_type, bstr **boundary, uint64_t *flags) {
1473
0
    if ((content_type == NULL) || (boundary == NULL) || (flags == NULL)) return HTP_ERROR;
1474
1475
    // Our approach is to ignore the MIME type and instead just look for
1476
    // the boundary. This approach is more reliable in the face of various
1477
    // evasion techniques that focus on submitting invalid MIME types.
1478
1479
    // Reset flags.
1480
0
    *flags = 0;
1481
1482
    // Look for the boundary, case insensitive.
1483
0
    int i = bstr_index_of_c_nocase(content_type, "boundary");
1484
0
    if (i == -1) return HTP_DECLINED;
1485
1486
0
    unsigned char *data = bstr_ptr(content_type) + i + 8;
1487
0
    size_t len = bstr_len(content_type) - i - 8;
1488
1489
    // Look for the boundary value.
1490
0
    size_t pos = 0;
1491
0
    while ((pos < len) && (data[pos] != '=')) {
1492
0
        if (htp_is_space(data[pos])) {
1493
            // It is unusual to see whitespace before the equals sign.
1494
0
            *flags |= HTP_MULTIPART_HBOUNDARY_UNUSUAL;
1495
0
        } else {
1496
            // But seeing a non-whitespace character may indicate evasion.
1497
0
            *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
1498
0
        }
1499
1500
0
        pos++;
1501
0
    }
1502
1503
0
    if (pos >= len) {
1504
        // No equals sign in the header.
1505
0
        *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
1506
0
        return HTP_DECLINED;
1507
0
    }
1508
1509
    // Go over the '=' character.
1510
0
    pos++;
1511
1512
    // Ignore any whitespace after the equals sign.
1513
0
    while ((pos < len) && (htp_is_space(data[pos]))) {
1514
0
        if (htp_is_space(data[pos])) {
1515
            // It is unusual to see whitespace after
1516
            // the equals sign.
1517
0
            *flags |= HTP_MULTIPART_HBOUNDARY_UNUSUAL;
1518
0
        }
1519
1520
0
        pos++;
1521
0
    }
1522
1523
0
    if (pos >= len) {
1524
        // No value after the equals sign.
1525
0
        *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
1526
0
        return HTP_DECLINED;
1527
0
    }
1528
1529
0
    if (data[pos] == '"') {
1530
        // Quoted boundary.
1531
1532
        // Possibly not very unusual, but let's see.
1533
0
        *flags |= HTP_MULTIPART_HBOUNDARY_UNUSUAL;
1534
1535
0
        pos++; // Over the double quote.
1536
0
        size_t startpos = pos; // Starting position of the boundary.
1537
1538
        // Look for the terminating double quote.
1539
0
        while ((pos < len) && (data[pos] != '"')) pos++;
1540
1541
0
        if (pos >= len) {
1542
            // Ran out of space without seeing
1543
            // the terminating double quote.
1544
0
            *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
1545
1546
            // Include the starting double quote in the boundary.
1547
0
            startpos--;
1548
0
        }
1549
1550
0
        *boundary = bstr_dup_mem(data + startpos, pos - startpos);
1551
0
        if (*boundary == NULL) return HTP_ERROR;
1552
1553
0
        pos++; // Over the double quote.
1554
0
    } else {
1555
        // Boundary not quoted.
1556
1557
0
        size_t startpos = pos;
1558
1559
        // Find the end of the boundary. For the time being, we replicate
1560
        // the behavior of PHP 5.4.x. This may result with a boundary that's
1561
        // closer to what would be accepted in real life. Our subsequent
1562
        // checks of boundary characters will catch irregularities.
1563
0
        while ((pos < len) && (data[pos] != ',') && (data[pos] != ';') && (!htp_is_space(data[pos]))) pos++;
1564
1565
0
        *boundary = bstr_dup_mem(data + startpos, pos - startpos);
1566
0
        if (*boundary == NULL) return HTP_ERROR;
1567
0
    }
1568
1569
    // Check for a zero-length boundary.
1570
0
    if (bstr_len(*boundary) == 0) {
1571
0
        *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
1572
0
        bstr_free(*boundary);
1573
0
        *boundary = NULL;
1574
0
        return HTP_DECLINED;
1575
0
    }
1576
1577
    // Allow only whitespace characters after the boundary.
1578
0
    int seen_space = 0, seen_non_space = 0;
1579
1580
0
    while (pos < len) {
1581
0
        if (!htp_is_space(data[pos])) {
1582
0
            seen_non_space = 1;
1583
0
        } else {
1584
0
            seen_space = 1;
1585
0
        }
1586
1587
0
        pos++;
1588
0
    }
1589
1590
    // Raise INVALID if we see any non-space characters,
1591
    // but raise UNUSUAL if we see _only_ space characters.
1592
0
    if (seen_non_space) {
1593
0
        *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
1594
0
    } else if (seen_space) {
1595
0
        *flags |= HTP_MULTIPART_HBOUNDARY_UNUSUAL;
1596
0
    }
1597
1598
    #ifdef HTP_DEBUG
1599
    fprint_bstr(stderr, "Multipart boundary", *boundary);
1600
    #endif   
1601
1602
    // Validate boundary characters.
1603
0
    htp_mpartp_validate_boundary(*boundary, flags);
1604
1605
    // Correlate with the MIME type. This might be a tad too
1606
    // sensitive because it may catch non-browser access with sloppy
1607
    // implementations, but let's go with it for now.    
1608
0
    if (bstr_begins_with_c(content_type, "multipart/form-data;") == 0) {
1609
0
        *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
1610
0
    }
1611
1612
0
    htp_mpartp_validate_content_type(content_type, flags);
1613
1614
0
    return HTP_OK;
1615
0
}