/src/suricata7/libhtp/htp/htp_request_generic.c
Line | Count | Source (jump to first uncovered line) |
1 | | /*************************************************************************** |
2 | | * Copyright (c) 2009-2010 Open Information Security Foundation |
3 | | * Copyright (c) 2010-2013 Qualys, Inc. |
4 | | * All rights reserved. |
5 | | * |
6 | | * Redistribution and use in source and binary forms, with or without |
7 | | * modification, are permitted provided that the following conditions are |
8 | | * met: |
9 | | * |
10 | | * - Redistributions of source code must retain the above copyright |
11 | | * notice, this list of conditions and the following disclaimer. |
12 | | |
13 | | * - Redistributions in binary form must reproduce the above copyright |
14 | | * notice, this list of conditions and the following disclaimer in the |
15 | | * documentation and/or other materials provided with the distribution. |
16 | | |
17 | | * - Neither the name of the Qualys, Inc. nor the names of its |
18 | | * contributors may be used to endorse or promote products derived from |
19 | | * this software without specific prior written permission. |
20 | | * |
21 | | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
22 | | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
23 | | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
24 | | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
25 | | * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
26 | | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
27 | | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
28 | | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
29 | | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
30 | | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
31 | | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
32 | | ***************************************************************************/ |
33 | | |
34 | | /** |
35 | | * @file |
36 | | * @author Ivan Ristic <ivanr@webkreator.com> |
37 | | */ |
38 | | |
39 | | #include "htp_config_auto.h" |
40 | | |
41 | | #include "htp_private.h" |
42 | | |
43 | | /** |
44 | | * Extract one request header. A header can span multiple lines, in |
45 | | * which case they will be folded into one before parsing is attempted. |
46 | | * |
47 | | * @param[in] connp |
48 | | * @param[in] data |
49 | | * @param[in] len |
50 | | * @return HTP_OK or HTP_ERROR |
51 | | */ |
52 | 684k | htp_status_t htp_process_request_header_generic(htp_connp_t *connp, unsigned char *data, size_t len) { |
53 | | // Create a new header structure. |
54 | 684k | htp_header_t *h = calloc(1, sizeof (htp_header_t)); |
55 | 684k | if (h == NULL) return HTP_ERROR; |
56 | | |
57 | | // Now try to parse the header. |
58 | 684k | if (htp_parse_request_header_generic(connp, h, data, len) != HTP_OK) { |
59 | 0 | free(h); |
60 | 0 | return HTP_ERROR; |
61 | 0 | } |
62 | | |
63 | | #ifdef HTP_DEBUG |
64 | | fprint_bstr(stderr, "Header name", h->name); |
65 | | fprint_bstr(stderr, "Header value", h->value); |
66 | | #endif |
67 | | |
68 | | // Do we already have a header with the same name? |
69 | 684k | htp_header_t *h_existing = htp_table_get(connp->in_tx->request_headers, h->name); |
70 | 684k | if (h_existing != NULL) { |
71 | | // TODO Do we want to have a list of the headers that are |
72 | | // allowed to be combined in this way? |
73 | 467k | if ((h_existing->flags & HTP_FIELD_REPEATED) == 0) { |
74 | | // This is the second occurence for this header. |
75 | 54.4k | htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Repetition for header"); |
76 | 413k | } else { |
77 | | // For simplicity reasons, we count the repetitions of all headers |
78 | 413k | if (connp->in_tx->req_header_repetitions < HTP_MAX_HEADERS_REPETITIONS) { |
79 | 178k | connp->in_tx->req_header_repetitions++; |
80 | 234k | } else { |
81 | 234k | bstr_free(h->name); |
82 | 234k | bstr_free(h->value); |
83 | 234k | free(h); |
84 | 234k | return HTP_OK; |
85 | 234k | } |
86 | 413k | } |
87 | | // Keep track of repeated same-name headers. |
88 | 233k | h_existing->flags |= HTP_FIELD_REPEATED; |
89 | | |
90 | | // Having multiple C-L headers is against the RFC but |
91 | | // servers may ignore the subsequent headers if the values are the same. |
92 | 233k | if (bstr_cmp_c_nocase(h->name, "Content-Length") == 0) { |
93 | | // Don't use string comparison here because we want to |
94 | | // ignore small formatting differences. |
95 | | |
96 | 23.7k | int64_t existing_cl = htp_parse_content_length(h_existing->value, NULL); |
97 | 23.7k | int64_t new_cl = htp_parse_content_length(h->value, NULL); |
98 | | // Ambiguous response C-L value. |
99 | 23.7k | if ((existing_cl == -1) || (new_cl == -1) || (existing_cl != new_cl)) { |
100 | 8.77k | htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Ambiguous request C-L value"); |
101 | 8.77k | } |
102 | | // Ignoring the new C-L header that has the same value as the previous ones. |
103 | 209k | } else { |
104 | | // Add to the existing header. |
105 | 209k | bstr *new_value = bstr_expand(h_existing->value, bstr_len(h_existing->value) + 2 + bstr_len(h->value)); |
106 | 209k | if (new_value == NULL) { |
107 | 0 | bstr_free(h->name); |
108 | 0 | bstr_free(h->value); |
109 | 0 | free(h); |
110 | 0 | return HTP_ERROR; |
111 | 0 | } |
112 | | |
113 | 209k | h_existing->value = new_value; |
114 | 209k | bstr_add_mem_noex(h_existing->value, ", ", 2); |
115 | 209k | bstr_add_noex(h_existing->value, h->value); |
116 | 209k | } |
117 | | |
118 | | // The new header structure is no longer needed. |
119 | 233k | bstr_free(h->name); |
120 | 233k | bstr_free(h->value); |
121 | 233k | free(h); |
122 | 233k | } else { |
123 | 216k | if (htp_table_size(connp->in_tx->request_headers) > connp->cfg->number_headers_limit) { |
124 | 0 | if (!(connp->in_tx->flags & HTP_HEADERS_TOO_MANY)) { |
125 | 0 | connp->in_tx->flags |= HTP_HEADERS_TOO_MANY; |
126 | 0 | htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Too many request headers"); |
127 | 0 | } |
128 | 0 | bstr_free(h->name); |
129 | 0 | bstr_free(h->value); |
130 | 0 | free(h); |
131 | | // give up on what comes next |
132 | 0 | return HTP_ERROR; |
133 | 0 | } |
134 | | // Add as a new header. |
135 | 216k | if (htp_table_add(connp->in_tx->request_headers, h->name, h) != HTP_OK) { |
136 | 0 | bstr_free(h->name); |
137 | 0 | bstr_free(h->value); |
138 | 0 | free(h); |
139 | 0 | } |
140 | 216k | } |
141 | | |
142 | 449k | return HTP_OK; |
143 | 684k | } |
144 | | |
145 | | /** |
146 | | * Generic request header parser. |
147 | | * |
148 | | * @param[in] connp |
149 | | * @param[in] h |
150 | | * @param[in] data |
151 | | * @param[in] len |
152 | | * @return HTP_OK or HTP_ERROR |
153 | | */ |
154 | 684k | htp_status_t htp_parse_request_header_generic(htp_connp_t *connp, htp_header_t *h, unsigned char *data, size_t len) { |
155 | 684k | size_t name_start, name_end; |
156 | 684k | size_t value_start, value_end; |
157 | | |
158 | 684k | htp_chomp(data, &len); |
159 | | |
160 | 684k | name_start = 0; |
161 | | |
162 | | // Look for the colon. |
163 | 684k | size_t colon_pos = 0; |
164 | 9.33M | while ((colon_pos < len) && (data[colon_pos] != '\0') && (data[colon_pos] != ':')) colon_pos++; |
165 | | |
166 | 684k | if ((colon_pos == len) || (data[colon_pos] == '\0')) { |
167 | | // Missing colon. |
168 | | |
169 | 316k | h->flags |= HTP_FIELD_UNPARSEABLE; |
170 | | |
171 | | // Log only once per transaction. |
172 | 316k | if (!(connp->in_tx->flags & HTP_FIELD_UNPARSEABLE)) { |
173 | 69.5k | connp->in_tx->flags |= HTP_FIELD_UNPARSEABLE; |
174 | 69.5k | htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request field invalid: colon missing"); |
175 | 69.5k | } |
176 | | |
177 | | // We handle this case as a header with an empty name, with the value equal |
178 | | // to the entire input string. |
179 | | |
180 | | // TODO Apache will respond to this problem with a 400. |
181 | | |
182 | | // Now extract the name and the value |
183 | 316k | h->name = bstr_dup_c(""); |
184 | 316k | if (h->name == NULL) return HTP_ERROR; |
185 | | |
186 | | // Ignore LWS after field-content. |
187 | 316k | value_end = len - 1; |
188 | 346k | while ((value_end > 0) && (htp_is_lws(data[value_end]))) { |
189 | 29.2k | value_end--; |
190 | 29.2k | } |
191 | 316k | h->value = bstr_dup_mem(data, value_end + 1); |
192 | 316k | if (h->value == NULL) { |
193 | 0 | bstr_free(h->name); |
194 | 0 | return HTP_ERROR; |
195 | 0 | } |
196 | | |
197 | 316k | return HTP_OK; |
198 | 316k | } |
199 | | |
200 | 367k | if (colon_pos == 0) { |
201 | | // Empty header name. |
202 | | |
203 | 11.4k | h->flags |= HTP_FIELD_INVALID; |
204 | | |
205 | | // Log only once per transaction. |
206 | 11.4k | if (!(connp->in_tx->flags & HTP_FIELD_INVALID)) { |
207 | 4.98k | connp->in_tx->flags |= HTP_FIELD_INVALID; |
208 | 4.98k | htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request field invalid: empty name"); |
209 | 4.98k | } |
210 | 11.4k | } |
211 | | |
212 | 367k | name_end = colon_pos; |
213 | | |
214 | | // Ignore LWS after field-name. |
215 | 367k | size_t prev = name_end; |
216 | 389k | while ((prev > name_start) && (htp_is_lws(data[prev - 1]))) { |
217 | | // LWS after header name. |
218 | | |
219 | 22.0k | prev--; |
220 | 22.0k | name_end--; |
221 | | |
222 | 22.0k | h->flags |= HTP_FIELD_INVALID; |
223 | | |
224 | | // Log only once per transaction. |
225 | 22.0k | if (!(connp->in_tx->flags & HTP_FIELD_INVALID)) { |
226 | 7.80k | connp->in_tx->flags |= HTP_FIELD_INVALID; |
227 | 7.80k | htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request field invalid: LWS after name"); |
228 | 7.80k | } |
229 | 22.0k | } |
230 | | |
231 | | // Header value. |
232 | | |
233 | 367k | value_start = colon_pos; |
234 | | |
235 | | // Go over the colon. |
236 | 367k | if (value_start < len) { |
237 | 367k | value_start++; |
238 | 367k | } |
239 | | |
240 | | // Ignore LWS before field-content. |
241 | 651k | while ((value_start < len) && (htp_is_lws(data[value_start]))) { |
242 | 283k | value_start++; |
243 | 283k | } |
244 | | |
245 | | // Look for the end of field-content. |
246 | 367k | value_end = len; |
247 | | |
248 | | // Ignore LWS after field-content. |
249 | 367k | prev = value_end - 1; |
250 | 377k | while ((prev > value_start) && (htp_is_lws(data[prev]))) { |
251 | 9.57k | prev--; |
252 | 9.57k | value_end--; |
253 | 9.57k | } |
254 | | |
255 | | // Check that the header name is a token. |
256 | 367k | size_t i = name_start; |
257 | 2.52M | while (i < name_end) { |
258 | 2.35M | if (!htp_is_token(data[i])) { |
259 | | // Incorrectly formed header name. |
260 | | |
261 | 204k | h->flags |= HTP_FIELD_INVALID; |
262 | | |
263 | | // Log only once per transaction. |
264 | 204k | if (!(connp->in_tx->flags & HTP_FIELD_INVALID)) { |
265 | 20.5k | connp->in_tx->flags |= HTP_FIELD_INVALID; |
266 | 20.5k | htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request header name is not a token"); |
267 | 20.5k | } |
268 | | |
269 | 204k | break; |
270 | 204k | } |
271 | | |
272 | 2.15M | i++; |
273 | 2.15M | } |
274 | | |
275 | | // Now extract the name and the value |
276 | 367k | h->name = bstr_dup_mem(data + name_start, name_end - name_start); |
277 | 367k | if (h->name == NULL) return HTP_ERROR; |
278 | | |
279 | 367k | h->value = bstr_dup_mem(data + value_start, value_end - value_start); |
280 | 367k | if (h->value == NULL) { |
281 | 0 | bstr_free(h->name); |
282 | 0 | return HTP_ERROR; |
283 | 0 | } |
284 | | |
285 | 367k | return HTP_OK; |
286 | 367k | } |
287 | | |
288 | | /** |
289 | | * Generic request line parser. |
290 | | * |
291 | | * @param[in] connp |
292 | | * @return HTP_OK or HTP_ERROR |
293 | | */ |
294 | 187k | htp_status_t htp_parse_request_line_generic(htp_connp_t *connp) { |
295 | 187k | return htp_parse_request_line_generic_ex(connp, 0 /* NUL does not terminates line */); |
296 | 187k | } |
297 | | |
298 | 187k | htp_status_t htp_parse_request_line_generic_ex(htp_connp_t *connp, int nul_terminates) { |
299 | 187k | htp_tx_t *tx = connp->in_tx; |
300 | 187k | unsigned char *data = bstr_ptr(tx->request_line); |
301 | 187k | size_t len = bstr_len(tx->request_line); |
302 | 187k | size_t pos = 0; |
303 | 187k | size_t mstart = 0; |
304 | 187k | size_t start; |
305 | 187k | size_t bad_delim; |
306 | | |
307 | 187k | if (nul_terminates) { |
308 | | // The line ends with the first NUL byte. |
309 | | |
310 | 0 | size_t newlen = 0; |
311 | 0 | while ((pos < len) && (data[pos] != '\0')) { |
312 | 0 | pos++; |
313 | 0 | newlen++; |
314 | 0 | } |
315 | | |
316 | | // Start again, with the new length. |
317 | 0 | len = newlen; |
318 | 0 | pos = 0; |
319 | 0 | } |
320 | | |
321 | | // skip past leading whitespace. IIS allows this |
322 | 308k | while ((pos < len) && htp_is_space(data[pos])) pos++; |
323 | 187k | if (pos) { |
324 | 32.9k | htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: leading whitespace"); |
325 | 32.9k | mstart = pos; |
326 | | |
327 | 32.9k | if (connp->cfg->requestline_leading_whitespace_unwanted != HTP_UNWANTED_IGNORE) { |
328 | | // reset mstart so that we copy the whitespace into the method |
329 | 0 | mstart = 0; |
330 | | // set expected response code to this anomaly |
331 | 0 | tx->response_status_expected_number = connp->cfg->requestline_leading_whitespace_unwanted; |
332 | 0 | } |
333 | 32.9k | } |
334 | | |
335 | | // The request method starts at the beginning of the |
336 | | // line and ends with the first whitespace character. |
337 | 2.58M | while ((pos < len) && (!htp_is_space(data[pos]))) pos++; |
338 | | |
339 | | // No, we don't care if the method is empty. |
340 | | |
341 | 187k | tx->request_method = bstr_dup_mem(data + mstart, pos - mstart); |
342 | 187k | if (tx->request_method == NULL) return HTP_ERROR; |
343 | | |
344 | | #ifdef HTP_DEBUG |
345 | | fprint_raw_data(stderr, __func__, bstr_ptr(tx->request_method), bstr_len(tx->request_method)); |
346 | | #endif |
347 | | |
348 | 187k | tx->request_method_number = htp_convert_method_to_number(tx->request_method); |
349 | | |
350 | 187k | bad_delim = 0; |
351 | | // Ignore whitespace after request method. The RFC allows |
352 | | // for only one SP, but then suggests any number of SP and HT |
353 | | // should be permitted. Apache uses isspace(), which is even |
354 | | // more permitting, so that's what we use here. |
355 | 449k | while ((pos < len) && (isspace(data[pos]))) { |
356 | 262k | if (!bad_delim && data[pos] != 0x20) { |
357 | 35.8k | bad_delim++; |
358 | 35.8k | } |
359 | 262k | pos++; |
360 | 262k | } |
361 | | // Too much performance overhead for fuzzing |
362 | | #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION |
363 | | if (bad_delim) { |
364 | | htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: non-compliant delimiter between Method and URI"); |
365 | | } |
366 | | #endif |
367 | | |
368 | | // Is there anything after the request method? |
369 | 187k | if (pos == len) { |
370 | | // No, this looks like a HTTP/0.9 request. |
371 | | |
372 | 68.9k | tx->is_protocol_0_9 = 1; |
373 | 68.9k | tx->request_protocol_number = HTP_PROTOCOL_0_9; |
374 | 68.9k | if (tx->request_method_number == HTP_M_UNKNOWN) |
375 | 67.7k | htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: unknown method only"); |
376 | | |
377 | 68.9k | return HTP_OK; |
378 | 68.9k | } |
379 | | |
380 | 118k | start = pos; |
381 | 118k | bad_delim = 0; |
382 | 118k | if (tx->connp->cfg->allow_space_uri) { |
383 | 0 | pos = len - 1; |
384 | | // Skips the spaces at the end of line (after protocol) |
385 | 0 | while (pos > start && htp_is_space(data[pos])) pos--; |
386 | | // The URI ends with the last whitespace. |
387 | 0 | while ((pos > start) && (data[pos] != 0x20)) { |
388 | 0 | if (!bad_delim && htp_is_space(data[pos])) { |
389 | 0 | bad_delim++; |
390 | 0 | } |
391 | 0 | pos--; |
392 | 0 | } |
393 | | /* if we've seen some 'bad' delimiters, we retry with those */ |
394 | 0 | if (bad_delim && pos == start) { |
395 | | // special case: even though RFC's allow only SP (0x20), many |
396 | | // implementations allow other delimiters, like tab or other |
397 | | // characters that isspace() accepts. |
398 | 0 | pos = len - 1; |
399 | 0 | while ((pos > start) && (!htp_is_space(data[pos]))) pos--; |
400 | 0 | } else { |
401 | | // reset bad_delim found in protocol part |
402 | 0 | bad_delim = 0; |
403 | 0 | for (size_t i = start; i < pos; i++) { |
404 | 0 | if (data[i] != 0x20 && htp_is_space(data[i])) { |
405 | 0 | bad_delim = 1; |
406 | 0 | break; |
407 | 0 | } |
408 | 0 | } |
409 | 0 | } |
410 | 0 | if (bad_delim) { |
411 | | #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION |
412 | | // warn regardless if we've seen non-compliant chars |
413 | | htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: URI contains non-compliant delimiter"); |
414 | | #endif |
415 | 0 | } else if (pos == start) { |
416 | 0 | pos = len; |
417 | 0 | } |
418 | 118k | } else { |
419 | | // The URI ends with the first whitespace. |
420 | 13.1M | while ((pos < len) && (data[pos] != 0x20)) { |
421 | 13.0M | if (!bad_delim && htp_is_space(data[pos])) { |
422 | 20.7k | bad_delim++; |
423 | 20.7k | } |
424 | 13.0M | pos++; |
425 | 13.0M | } |
426 | | /* if we've seen some 'bad' delimiters, we retry with those */ |
427 | 118k | if (bad_delim && pos == len) { |
428 | | // special case: even though RFC's allow only SP (0x20), many |
429 | | // implementations allow other delimiters, like tab or other |
430 | | // characters that isspace() accepts. |
431 | 10.0k | pos = start; |
432 | 909k | while ((pos < len) && (!htp_is_space(data[pos]))) pos++; |
433 | 10.0k | } |
434 | | #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION |
435 | | if (bad_delim) { |
436 | | // warn regardless if we've seen non-compliant chars |
437 | | htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: URI contains non-compliant delimiter"); |
438 | | } |
439 | | #endif |
440 | 118k | } |
441 | | |
442 | 118k | tx->request_uri = bstr_dup_mem(data + start, pos - start); |
443 | 118k | if (tx->request_uri == NULL) return HTP_ERROR; |
444 | | |
445 | | #ifdef HTP_DEBUG |
446 | | fprint_raw_data(stderr, __func__, bstr_ptr(tx->request_uri), bstr_len(tx->request_uri)); |
447 | | #endif |
448 | | |
449 | | // Ignore whitespace after URI. |
450 | 311k | while ((pos < len) && (htp_is_space(data[pos]))) pos++; |
451 | | |
452 | | // Is there protocol information available? |
453 | 118k | if (pos == len) { |
454 | | // No, this looks like a HTTP/0.9 request. |
455 | | |
456 | 44.4k | tx->is_protocol_0_9 = 1; |
457 | 44.4k | tx->request_protocol_number = HTP_PROTOCOL_0_9; |
458 | 44.4k | if (tx->request_method_number == HTP_M_UNKNOWN) |
459 | 37.4k | htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: unknown method and no protocol"); |
460 | | |
461 | 44.4k | return HTP_OK; |
462 | 44.4k | } |
463 | | |
464 | | // The protocol information continues until the end of the line. |
465 | 74.1k | tx->request_protocol = bstr_dup_mem(data + pos, len - pos); |
466 | 74.1k | if (tx->request_protocol == NULL) return HTP_ERROR; |
467 | | |
468 | 74.1k | tx->request_protocol_number = htp_parse_protocol(tx->request_protocol); |
469 | 74.1k | if (tx->request_method_number == HTP_M_UNKNOWN && tx->request_protocol_number == HTP_PROTOCOL_INVALID) |
470 | 46.1k | htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: unknown method and invalid protocol"); |
471 | | |
472 | | #ifdef HTP_DEBUG |
473 | | fprint_raw_data(stderr, __func__, bstr_ptr(tx->request_protocol), bstr_len(tx->request_protocol)); |
474 | | #endif |
475 | | |
476 | 74.1k | return HTP_OK; |
477 | 74.1k | } |
478 | | |