Coverage Report

Created: 2025-07-23 07:29

/src/suricata7/libhtp/htp/htp_request_generic.c
Line
Count
Source (jump to first uncovered line)
1
/***************************************************************************
2
 * Copyright (c) 2009-2010 Open Information Security Foundation
3
 * Copyright (c) 2010-2013 Qualys, Inc.
4
 * All rights reserved.
5
 * 
6
 * Redistribution and use in source and binary forms, with or without
7
 * modification, are permitted provided that the following conditions are
8
 * met:
9
 * 
10
 * - Redistributions of source code must retain the above copyright
11
 *   notice, this list of conditions and the following disclaimer.
12
13
 * - Redistributions in binary form must reproduce the above copyright
14
 *   notice, this list of conditions and the following disclaimer in the
15
 *   documentation and/or other materials provided with the distribution.
16
17
 * - Neither the name of the Qualys, Inc. nor the names of its
18
 *   contributors may be used to endorse or promote products derived from
19
 *   this software without specific prior written permission.
20
 * 
21
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25
 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
 ***************************************************************************/
33
34
/**
35
 * @file
36
 * @author Ivan Ristic <ivanr@webkreator.com>
37
 */
38
39
#include "htp_config_auto.h"
40
41
#include "htp_private.h"
42
43
/**
44
 * Extract one request header. A header can span multiple lines, in
45
 * which case they will be folded into one before parsing is attempted.
46
 *
47
 * @param[in] connp
48
 * @param[in] data
49
 * @param[in] len
50
 * @return HTP_OK or HTP_ERROR
51
 */
52
684k
htp_status_t htp_process_request_header_generic(htp_connp_t *connp, unsigned char *data, size_t len) {
53
    // Create a new header structure.
54
684k
    htp_header_t *h = calloc(1, sizeof (htp_header_t));
55
684k
    if (h == NULL) return HTP_ERROR;
56
57
    // Now try to parse the header.
58
684k
    if (htp_parse_request_header_generic(connp, h, data, len) != HTP_OK) {
59
0
        free(h);
60
0
        return HTP_ERROR;
61
0
    }
62
63
    #ifdef HTP_DEBUG
64
    fprint_bstr(stderr, "Header name", h->name);
65
    fprint_bstr(stderr, "Header value", h->value);
66
    #endif
67
68
    // Do we already have a header with the same name?
69
684k
    htp_header_t *h_existing = htp_table_get(connp->in_tx->request_headers, h->name);
70
684k
    if (h_existing != NULL) {
71
        // TODO Do we want to have a list of the headers that are
72
        //      allowed to be combined in this way?
73
467k
        if ((h_existing->flags & HTP_FIELD_REPEATED) == 0) {
74
            // This is the second occurence for this header.
75
54.4k
            htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Repetition for header");
76
413k
        } else {
77
            // For simplicity reasons, we count the repetitions of all headers
78
413k
            if (connp->in_tx->req_header_repetitions < HTP_MAX_HEADERS_REPETITIONS) {
79
178k
                connp->in_tx->req_header_repetitions++;
80
234k
            } else {
81
234k
                bstr_free(h->name);
82
234k
                bstr_free(h->value);
83
234k
                free(h);
84
234k
                return HTP_OK;
85
234k
            }
86
413k
        }
87
        // Keep track of repeated same-name headers.
88
233k
        h_existing->flags |= HTP_FIELD_REPEATED;
89
90
        // Having multiple C-L headers is against the RFC but
91
        // servers may ignore the subsequent headers if the values are the same.
92
233k
        if (bstr_cmp_c_nocase(h->name, "Content-Length") == 0) {
93
            // Don't use string comparison here because we want to
94
            // ignore small formatting differences.
95
96
23.7k
            int64_t existing_cl = htp_parse_content_length(h_existing->value, NULL);
97
23.7k
            int64_t new_cl = htp_parse_content_length(h->value, NULL);
98
            // Ambiguous response C-L value.
99
23.7k
            if ((existing_cl == -1) || (new_cl == -1) || (existing_cl != new_cl)) {
100
8.77k
                htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Ambiguous request C-L value");
101
8.77k
            }
102
            // Ignoring the new C-L header that has the same value as the previous ones.
103
209k
        } else {
104
            // Add to the existing header.
105
209k
            bstr *new_value = bstr_expand(h_existing->value, bstr_len(h_existing->value) + 2 + bstr_len(h->value));
106
209k
            if (new_value == NULL) {
107
0
                bstr_free(h->name);
108
0
                bstr_free(h->value);
109
0
                free(h);
110
0
                return HTP_ERROR;
111
0
            }
112
113
209k
            h_existing->value = new_value;
114
209k
            bstr_add_mem_noex(h_existing->value, ", ", 2);
115
209k
            bstr_add_noex(h_existing->value, h->value);
116
209k
        }
117
118
        // The new header structure is no longer needed.
119
233k
        bstr_free(h->name);
120
233k
        bstr_free(h->value);
121
233k
        free(h);
122
233k
    } else {
123
216k
        if (htp_table_size(connp->in_tx->request_headers) > connp->cfg->number_headers_limit) {
124
0
            if (!(connp->in_tx->flags & HTP_HEADERS_TOO_MANY)) {
125
0
                connp->in_tx->flags |= HTP_HEADERS_TOO_MANY;
126
0
                htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Too many request headers");
127
0
            }
128
0
            bstr_free(h->name);
129
0
            bstr_free(h->value);
130
0
            free(h);
131
            // give up on what comes next
132
0
            return HTP_ERROR;
133
0
        }
134
        // Add as a new header.
135
216k
        if (htp_table_add(connp->in_tx->request_headers, h->name, h) != HTP_OK) {
136
0
            bstr_free(h->name);
137
0
            bstr_free(h->value);
138
0
            free(h);
139
0
        }
140
216k
    }
141
142
449k
    return HTP_OK;
143
684k
}
144
145
/**
146
 * Generic request header parser.
147
 *
148
 * @param[in] connp
149
 * @param[in] h
150
 * @param[in] data
151
 * @param[in] len
152
 * @return HTP_OK or HTP_ERROR
153
 */
154
684k
htp_status_t htp_parse_request_header_generic(htp_connp_t *connp, htp_header_t *h, unsigned char *data, size_t len) {
155
684k
    size_t name_start, name_end;
156
684k
    size_t value_start, value_end;
157
158
684k
    htp_chomp(data, &len);
159
160
684k
    name_start = 0;
161
162
    // Look for the colon.
163
684k
    size_t colon_pos = 0;
164
9.33M
    while ((colon_pos < len) && (data[colon_pos] != '\0') && (data[colon_pos] != ':')) colon_pos++;
165
166
684k
    if ((colon_pos == len) || (data[colon_pos] == '\0')) {
167
        // Missing colon.
168
169
316k
        h->flags |= HTP_FIELD_UNPARSEABLE;
170
171
        // Log only once per transaction.
172
316k
        if (!(connp->in_tx->flags & HTP_FIELD_UNPARSEABLE)) {
173
69.5k
            connp->in_tx->flags |= HTP_FIELD_UNPARSEABLE;
174
69.5k
            htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request field invalid: colon missing");
175
69.5k
        }
176
177
        // We handle this case as a header with an empty name, with the value equal
178
        // to the entire input string.
179
180
        // TODO Apache will respond to this problem with a 400.
181
182
        // Now extract the name and the value
183
316k
        h->name = bstr_dup_c("");
184
316k
        if (h->name == NULL) return HTP_ERROR;
185
186
        // Ignore LWS after field-content.
187
316k
        value_end = len - 1;
188
346k
        while ((value_end > 0) && (htp_is_lws(data[value_end]))) {
189
29.2k
            value_end--;
190
29.2k
        }
191
316k
        h->value = bstr_dup_mem(data, value_end + 1);
192
316k
        if (h->value == NULL) {
193
0
            bstr_free(h->name);
194
0
            return HTP_ERROR;
195
0
        }
196
197
316k
        return HTP_OK;
198
316k
    }
199
200
367k
    if (colon_pos == 0) {
201
        // Empty header name.
202
203
11.4k
        h->flags |= HTP_FIELD_INVALID;
204
205
        // Log only once per transaction.
206
11.4k
        if (!(connp->in_tx->flags & HTP_FIELD_INVALID)) {
207
4.98k
            connp->in_tx->flags |= HTP_FIELD_INVALID;
208
4.98k
            htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request field invalid: empty name");
209
4.98k
        }
210
11.4k
    }
211
212
367k
    name_end = colon_pos;
213
214
    // Ignore LWS after field-name.
215
367k
    size_t prev = name_end;
216
389k
    while ((prev > name_start) && (htp_is_lws(data[prev - 1]))) {
217
        // LWS after header name.
218
219
22.0k
        prev--;
220
22.0k
        name_end--;
221
222
22.0k
        h->flags |= HTP_FIELD_INVALID;
223
224
        // Log only once per transaction.
225
22.0k
        if (!(connp->in_tx->flags & HTP_FIELD_INVALID)) {
226
7.80k
            connp->in_tx->flags |= HTP_FIELD_INVALID;
227
7.80k
            htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request field invalid: LWS after name");
228
7.80k
        }
229
22.0k
    }
230
231
    // Header value.
232
233
367k
    value_start = colon_pos;
234
235
    // Go over the colon.
236
367k
    if (value_start < len) {
237
367k
        value_start++;
238
367k
    }
239
240
    // Ignore LWS before field-content.
241
651k
    while ((value_start < len) && (htp_is_lws(data[value_start]))) {
242
283k
        value_start++;
243
283k
    }
244
245
    // Look for the end of field-content.
246
367k
    value_end = len;
247
248
    // Ignore LWS after field-content.
249
367k
    prev = value_end - 1;
250
377k
    while ((prev > value_start) && (htp_is_lws(data[prev]))) {
251
9.57k
        prev--;
252
9.57k
        value_end--;
253
9.57k
    }
254
255
    // Check that the header name is a token.
256
367k
    size_t i = name_start;
257
2.52M
    while (i < name_end) {
258
2.35M
        if (!htp_is_token(data[i])) {
259
            // Incorrectly formed header name.
260
261
204k
            h->flags |= HTP_FIELD_INVALID;
262
263
            // Log only once per transaction.
264
204k
            if (!(connp->in_tx->flags & HTP_FIELD_INVALID)) {
265
20.5k
                connp->in_tx->flags |= HTP_FIELD_INVALID;
266
20.5k
                htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request header name is not a token");
267
20.5k
            }
268
269
204k
            break;
270
204k
        }
271
272
2.15M
        i++;
273
2.15M
    }
274
275
    // Now extract the name and the value
276
367k
    h->name = bstr_dup_mem(data + name_start, name_end - name_start);
277
367k
    if (h->name == NULL) return HTP_ERROR;
278
279
367k
    h->value = bstr_dup_mem(data + value_start, value_end - value_start);
280
367k
    if (h->value == NULL) {
281
0
        bstr_free(h->name);
282
0
        return HTP_ERROR;
283
0
    }
284
285
367k
    return HTP_OK;
286
367k
}
287
288
/**
289
 * Generic request line parser.
290
 *
291
 * @param[in] connp
292
 * @return HTP_OK or HTP_ERROR
293
 */
294
187k
htp_status_t htp_parse_request_line_generic(htp_connp_t *connp) {
295
187k
    return htp_parse_request_line_generic_ex(connp, 0 /* NUL does not terminates line */);
296
187k
}
297
298
187k
htp_status_t htp_parse_request_line_generic_ex(htp_connp_t *connp, int nul_terminates) {
299
187k
    htp_tx_t *tx = connp->in_tx;
300
187k
    unsigned char *data = bstr_ptr(tx->request_line);
301
187k
    size_t len = bstr_len(tx->request_line);
302
187k
    size_t pos = 0;
303
187k
    size_t mstart = 0;
304
187k
    size_t start;
305
187k
    size_t bad_delim;
306
307
187k
    if (nul_terminates) {
308
        // The line ends with the first NUL byte.
309
        
310
0
        size_t newlen = 0;
311
0
        while ((pos < len) && (data[pos] != '\0')) {
312
0
            pos++;
313
0
            newlen++;
314
0
        }
315
316
        // Start again, with the new length.
317
0
        len = newlen;
318
0
        pos = 0;
319
0
    }
320
321
    // skip past leading whitespace. IIS allows this
322
308k
    while ((pos < len) && htp_is_space(data[pos])) pos++;
323
187k
    if (pos) {
324
32.9k
        htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: leading whitespace");
325
32.9k
        mstart = pos;
326
327
32.9k
        if (connp->cfg->requestline_leading_whitespace_unwanted != HTP_UNWANTED_IGNORE) {
328
            // reset mstart so that we copy the whitespace into the method
329
0
            mstart = 0;
330
            // set expected response code to this anomaly
331
0
            tx->response_status_expected_number = connp->cfg->requestline_leading_whitespace_unwanted;
332
0
        }
333
32.9k
    }
334
335
    // The request method starts at the beginning of the
336
    // line and ends with the first whitespace character.
337
2.58M
    while ((pos < len) && (!htp_is_space(data[pos]))) pos++;
338
339
    // No, we don't care if the method is empty.
340
341
187k
    tx->request_method = bstr_dup_mem(data + mstart, pos - mstart);
342
187k
    if (tx->request_method == NULL) return HTP_ERROR;
343
344
    #ifdef HTP_DEBUG
345
    fprint_raw_data(stderr, __func__, bstr_ptr(tx->request_method), bstr_len(tx->request_method));
346
    #endif
347
348
187k
    tx->request_method_number = htp_convert_method_to_number(tx->request_method);
349
350
187k
    bad_delim = 0;
351
    // Ignore whitespace after request method. The RFC allows
352
    // for only one SP, but then suggests any number of SP and HT
353
    // should be permitted. Apache uses isspace(), which is even
354
    // more permitting, so that's what we use here.
355
449k
    while ((pos < len) && (isspace(data[pos]))) {
356
262k
        if (!bad_delim && data[pos] != 0x20) {
357
35.8k
            bad_delim++;
358
35.8k
        }
359
262k
        pos++;
360
262k
    }
361
// Too much performance overhead for fuzzing
362
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
363
    if (bad_delim) {
364
        htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: non-compliant delimiter between Method and URI");
365
    }
366
#endif
367
368
    // Is there anything after the request method?
369
187k
    if (pos == len) {
370
        // No, this looks like a HTTP/0.9 request.
371
372
68.9k
        tx->is_protocol_0_9 = 1;
373
68.9k
        tx->request_protocol_number = HTP_PROTOCOL_0_9;
374
68.9k
        if (tx->request_method_number == HTP_M_UNKNOWN)
375
67.7k
            htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: unknown method only");
376
377
68.9k
        return HTP_OK;
378
68.9k
    }
379
380
118k
    start = pos;
381
118k
    bad_delim = 0;
382
118k
    if (tx->connp->cfg->allow_space_uri) {
383
0
        pos = len - 1;
384
        // Skips the spaces at the end of line (after protocol)
385
0
        while (pos > start && htp_is_space(data[pos])) pos--;
386
        // The URI ends with the last whitespace.
387
0
        while ((pos > start) && (data[pos] != 0x20)) {
388
0
            if (!bad_delim && htp_is_space(data[pos])) {
389
0
                bad_delim++;
390
0
            }
391
0
            pos--;
392
0
        }
393
        /* if we've seen some 'bad' delimiters, we retry with those */
394
0
        if (bad_delim && pos == start) {
395
            // special case: even though RFC's allow only SP (0x20), many
396
            // implementations allow other delimiters, like tab or other
397
            // characters that isspace() accepts.
398
0
            pos = len - 1;
399
0
            while ((pos > start) && (!htp_is_space(data[pos]))) pos--;
400
0
        } else {
401
            // reset bad_delim found in protocol part
402
0
            bad_delim = 0;
403
0
            for (size_t i = start; i < pos; i++) {
404
0
                if (data[i] != 0x20 && htp_is_space(data[i])) {
405
0
                    bad_delim = 1;
406
0
                    break;
407
0
                }
408
0
            }
409
0
        }
410
0
        if (bad_delim) {
411
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
412
            // warn regardless if we've seen non-compliant chars
413
            htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: URI contains non-compliant delimiter");
414
#endif
415
0
        } else if (pos == start) {
416
0
            pos = len;
417
0
        }
418
118k
    } else {
419
        // The URI ends with the first whitespace.
420
13.1M
        while ((pos < len) && (data[pos] != 0x20)) {
421
13.0M
            if (!bad_delim && htp_is_space(data[pos])) {
422
20.7k
                bad_delim++;
423
20.7k
            }
424
13.0M
            pos++;
425
13.0M
        }
426
        /* if we've seen some 'bad' delimiters, we retry with those */
427
118k
        if (bad_delim && pos == len) {
428
            // special case: even though RFC's allow only SP (0x20), many
429
            // implementations allow other delimiters, like tab or other
430
            // characters that isspace() accepts.
431
10.0k
            pos = start;
432
909k
            while ((pos < len) && (!htp_is_space(data[pos]))) pos++;
433
10.0k
        }
434
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
435
        if (bad_delim) {
436
            // warn regardless if we've seen non-compliant chars
437
            htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: URI contains non-compliant delimiter");
438
        }
439
#endif
440
118k
    }
441
442
118k
    tx->request_uri = bstr_dup_mem(data + start, pos - start);
443
118k
    if (tx->request_uri == NULL) return HTP_ERROR;
444
445
    #ifdef HTP_DEBUG
446
    fprint_raw_data(stderr, __func__, bstr_ptr(tx->request_uri), bstr_len(tx->request_uri));
447
    #endif
448
449
    // Ignore whitespace after URI.
450
311k
    while ((pos < len) && (htp_is_space(data[pos]))) pos++;
451
452
    // Is there protocol information available?
453
118k
    if (pos == len) {
454
        // No, this looks like a HTTP/0.9 request.
455
456
44.4k
        tx->is_protocol_0_9 = 1;
457
44.4k
        tx->request_protocol_number = HTP_PROTOCOL_0_9;
458
44.4k
        if (tx->request_method_number == HTP_M_UNKNOWN)
459
37.4k
            htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: unknown method and no protocol");
460
461
44.4k
        return HTP_OK;
462
44.4k
    }
463
464
    // The protocol information continues until the end of the line.
465
74.1k
    tx->request_protocol = bstr_dup_mem(data + pos, len - pos);
466
74.1k
    if (tx->request_protocol == NULL) return HTP_ERROR;
467
468
74.1k
    tx->request_protocol_number = htp_parse_protocol(tx->request_protocol);
469
74.1k
    if (tx->request_method_number == HTP_M_UNKNOWN && tx->request_protocol_number == HTP_PROTOCOL_INVALID)
470
46.1k
        htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: unknown method and invalid protocol");
471
472
    #ifdef HTP_DEBUG
473
    fprint_raw_data(stderr, __func__, bstr_ptr(tx->request_protocol), bstr_len(tx->request_protocol));
474
    #endif
475
476
74.1k
    return HTP_OK;
477
74.1k
}
478