/src/libhtp/htp/htp_urlencoded.c
Line | Count | Source (jump to first uncovered line) |
1 | | /*************************************************************************** |
2 | | * Copyright (c) 2009-2010 Open Information Security Foundation |
3 | | * Copyright (c) 2010-2013 Qualys, Inc. |
4 | | * All rights reserved. |
5 | | * |
6 | | * Redistribution and use in source and binary forms, with or without |
7 | | * modification, are permitted provided that the following conditions are |
8 | | * met: |
9 | | * |
10 | | * - Redistributions of source code must retain the above copyright |
11 | | * notice, this list of conditions and the following disclaimer. |
12 | | |
13 | | * - Redistributions in binary form must reproduce the above copyright |
14 | | * notice, this list of conditions and the following disclaimer in the |
15 | | * documentation and/or other materials provided with the distribution. |
16 | | |
17 | | * - Neither the name of the Qualys, Inc. nor the names of its |
18 | | * contributors may be used to endorse or promote products derived from |
19 | | * this software without specific prior written permission. |
20 | | * |
21 | | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
22 | | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
23 | | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
24 | | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
25 | | * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
26 | | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
27 | | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
28 | | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
29 | | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
30 | | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
31 | | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
32 | | ***************************************************************************/ |
33 | | |
34 | | /** |
35 | | * @file |
36 | | * @author Ivan Ristic <ivanr@webkreator.com> |
37 | | */ |
38 | | |
39 | | #include "htp_config_auto.h" |
40 | | |
41 | | #include "htp_private.h" |
42 | | |
43 | | /** |
44 | | * This method is invoked whenever a piece of data, belonging to a single field (name or value) |
45 | | * becomes available. It will either create a new parameter or store the transient information |
46 | | * until a parameter can be created. |
47 | | * |
48 | | * @param[in] urlenp |
49 | | * @param[in] data |
50 | | * @param[in] startpos |
51 | | * @param[in] endpos |
52 | | * @param[in] c Should contain -1 if the reason this function is called is because the end of |
53 | | * the current data chunk is reached. |
54 | | */ |
55 | 0 | static void htp_urlenp_add_field_piece(htp_urlenp_t *urlenp, const unsigned char *data, size_t startpos, size_t endpos, int last_char) { |
56 | | // Add field if we know it ended (last_char is something other than -1) |
57 | | // or if we know that there won't be any more input data (urlenp->_complete is true). |
58 | 0 | if ((last_char != -1) || (urlenp->_complete)) { |
59 | | // Prepare the field value, assembling from multiple pieces as necessary. |
60 | | |
61 | 0 | bstr *field = NULL; |
62 | | |
63 | | // Did we use the string builder for this field? |
64 | 0 | if (bstr_builder_size(urlenp->_bb) > 0) { |
65 | | // The current field consists of more than once piece, we have to use the string builder. |
66 | | |
67 | | // Add current piece to string builder. |
68 | 0 | if ((data != NULL) && (endpos - startpos > 0)) { |
69 | 0 | bstr_builder_append_mem(urlenp->_bb, data + startpos, endpos - startpos); |
70 | 0 | } |
71 | | |
72 | | // Generate the field and clear the string builder. |
73 | 0 | field = bstr_builder_to_str(urlenp->_bb); |
74 | 0 | if (field == NULL) return; |
75 | | |
76 | 0 | bstr_builder_clear(urlenp->_bb); |
77 | 0 | } else { |
78 | | // We only have the current piece to work with, so no need to involve the string builder. |
79 | 0 | if ((data != NULL) && (endpos - startpos > 0)) { |
80 | 0 | field = bstr_dup_mem(data + startpos, endpos - startpos); |
81 | 0 | if (field == NULL) return; |
82 | 0 | } |
83 | 0 | } |
84 | | |
85 | | // Process field as key or value, as appropriate. |
86 | | |
87 | 0 | if (urlenp->_state == HTP_URLENP_STATE_KEY) { |
88 | | // Key. |
89 | | |
90 | | // If there is no more work left to do, then we have a single key. Add it. |
91 | 0 | if ((urlenp->_complete)||(last_char == urlenp->argument_separator)) { |
92 | | |
93 | | // Handling empty pairs is tricky. We don't want to create a pair for |
94 | | // an entirely empty input, but in some cases it may be appropriate |
95 | | // (e.g., /index.php?&q=2). |
96 | 0 | if ((field != NULL)||(last_char == urlenp->argument_separator)) { |
97 | | // Add one pair, with an empty value and possibly empty key too. |
98 | |
|
99 | 0 | bstr *name = field; |
100 | 0 | if (name == NULL) { |
101 | 0 | name = bstr_dup_c(""); |
102 | 0 | if (name == NULL) return; |
103 | 0 | } |
104 | | |
105 | 0 | bstr *value = bstr_dup_c(""); |
106 | 0 | if (value == NULL) { |
107 | 0 | bstr_free(name); |
108 | 0 | return; |
109 | 0 | } |
110 | | |
111 | 0 | if (urlenp->decode_url_encoding) { |
112 | 0 | htp_tx_urldecode_params_inplace(urlenp->tx, name); |
113 | 0 | } |
114 | |
|
115 | 0 | htp_table_addn(urlenp->params, name, value); |
116 | |
|
117 | 0 | urlenp->_name = NULL; |
118 | |
|
119 | | #ifdef HTP_DEBUG |
120 | | fprint_raw_data(stderr, "NAME", bstr_ptr(name), bstr_len(name)); |
121 | | fprint_raw_data(stderr, "VALUE", bstr_ptr(value), bstr_len(value)); |
122 | | #endif |
123 | 0 | } |
124 | 0 | } else { |
125 | | // This key will possibly be followed by a value, so keep it for later. |
126 | 0 | urlenp->_name = field; |
127 | 0 | } |
128 | 0 | } else { |
129 | | // Value (with a key remembered from before). |
130 | |
|
131 | 0 | bstr *name = urlenp->_name; |
132 | 0 | urlenp->_name = NULL; |
133 | |
|
134 | 0 | if (name == NULL) { |
135 | 0 | name = bstr_dup_c(""); |
136 | 0 | if (name == NULL) { |
137 | 0 | bstr_free(field); |
138 | 0 | return; |
139 | 0 | } |
140 | 0 | } |
141 | | |
142 | 0 | bstr *value = field; |
143 | 0 | if (value == NULL) { |
144 | 0 | value = bstr_dup_c(""); |
145 | 0 | if (value == NULL) { |
146 | 0 | bstr_free(name); |
147 | 0 | return; |
148 | 0 | } |
149 | 0 | } |
150 | | |
151 | 0 | if (urlenp->decode_url_encoding) { |
152 | 0 | htp_tx_urldecode_params_inplace(urlenp->tx, name); |
153 | 0 | htp_tx_urldecode_params_inplace(urlenp->tx, value); |
154 | 0 | } |
155 | |
|
156 | 0 | htp_table_addn(urlenp->params, name, value); |
157 | |
|
158 | | #ifdef HTP_DEBUG |
159 | | fprint_raw_data(stderr, "NAME", bstr_ptr(name), bstr_len(name)); |
160 | | fprint_raw_data(stderr, "VALUE", bstr_ptr(value), bstr_len(value)); |
161 | | #endif |
162 | 0 | } |
163 | 0 | } else { |
164 | | // The field has not ended. We'll make a copy of of the available data for later. |
165 | 0 | if ((data != NULL) && (endpos - startpos > 0)) { |
166 | 0 | bstr_builder_append_mem(urlenp->_bb, data + startpos, endpos - startpos); |
167 | 0 | } |
168 | 0 | } |
169 | 0 | } |
170 | | |
171 | | /** |
172 | | * Creates a new URLENCODED parser. |
173 | | * |
174 | | * @return New parser, or NULL on memory allocation failure. |
175 | | */ |
176 | 0 | htp_urlenp_t *htp_urlenp_create(htp_tx_t *tx) { |
177 | 0 | htp_urlenp_t *urlenp = calloc(1, sizeof (htp_urlenp_t)); |
178 | 0 | if (urlenp == NULL) return NULL; |
179 | | |
180 | 0 | urlenp->tx = tx; |
181 | |
|
182 | 0 | urlenp->params = htp_table_create(HTP_URLENP_DEFAULT_PARAMS_SIZE); |
183 | 0 | if (urlenp->params == NULL) { |
184 | 0 | free(urlenp); |
185 | 0 | return NULL; |
186 | 0 | } |
187 | | |
188 | 0 | urlenp->_bb = bstr_builder_create(); |
189 | 0 | if (urlenp->_bb == NULL) { |
190 | 0 | htp_table_destroy(urlenp->params); |
191 | 0 | free(urlenp); |
192 | 0 | return NULL; |
193 | 0 | } |
194 | | |
195 | 0 | urlenp->argument_separator = '&'; |
196 | 0 | urlenp->decode_url_encoding = 1; |
197 | 0 | urlenp->_state = HTP_URLENP_STATE_KEY; |
198 | |
|
199 | 0 | return urlenp; |
200 | 0 | } |
201 | | |
202 | | /** |
203 | | * Destroys an existing URLENCODED parser. |
204 | | * |
205 | | * @param[in] urlenp |
206 | | */ |
207 | 186k | void htp_urlenp_destroy(htp_urlenp_t *urlenp) { |
208 | 186k | if (urlenp == NULL) return; |
209 | | |
210 | 0 | if (urlenp->_name != NULL) { |
211 | 0 | bstr_free(urlenp->_name); |
212 | 0 | } |
213 | |
|
214 | 0 | bstr_builder_destroy(urlenp->_bb); |
215 | |
|
216 | 0 | if (urlenp->params != NULL) { |
217 | | // Destroy parameters. |
218 | 0 | for (size_t i = 0, n = htp_table_size(urlenp->params); i < n; i++) { |
219 | 0 | bstr *b = htp_table_get_index(urlenp->params, i, NULL); |
220 | | // Parameter name will be freed by the table code. |
221 | 0 | bstr_free(b); |
222 | 0 | } |
223 | |
|
224 | 0 | htp_table_destroy(urlenp->params); |
225 | 0 | } |
226 | |
|
227 | 0 | free(urlenp); |
228 | 0 | } |
229 | | |
230 | | /** |
231 | | * Finalizes parsing, forcing the parser to convert any outstanding |
232 | | * data into parameters. This method should be invoked at the end |
233 | | * of a parsing operation that used htp_urlenp_parse_partial(). |
234 | | * |
235 | | * @param[in] urlenp |
236 | | * @return Success indication |
237 | | */ |
238 | 0 | htp_status_t htp_urlenp_finalize(htp_urlenp_t *urlenp) { |
239 | 0 | urlenp->_complete = 1; |
240 | 0 | return htp_urlenp_parse_partial(urlenp, NULL, 0); |
241 | 0 | } |
242 | | |
243 | | /** |
244 | | * Parses the provided data chunk under the assumption |
245 | | * that it contains all the data that will be parsed. When this |
246 | | * method is used for parsing the finalization method should not |
247 | | * be invoked. |
248 | | * |
249 | | * @param[in] urlenp |
250 | | * @param[in] data |
251 | | * @param[in] len |
252 | | * @return |
253 | | */ |
254 | 0 | htp_status_t htp_urlenp_parse_complete(htp_urlenp_t *urlenp, const void *data, size_t len) { |
255 | 0 | htp_urlenp_parse_partial(urlenp, data, len); |
256 | 0 | return htp_urlenp_finalize(urlenp); |
257 | 0 | } |
258 | | |
259 | | /** |
260 | | * Parses the provided data chunk, keeping state to allow streaming parsing, i.e., the |
261 | | * parsing where only partial information is available at any one time. The method |
262 | | * htp_urlenp_finalize() must be invoked at the end to finalize parsing. |
263 | | * |
264 | | * @param[in] urlenp |
265 | | * @param[in] _data |
266 | | * @param[in] len |
267 | | * @return |
268 | | */ |
269 | 0 | htp_status_t htp_urlenp_parse_partial(htp_urlenp_t *urlenp, const void *_data, size_t len) { |
270 | 0 | unsigned char *data = (unsigned char *) _data; |
271 | 0 | size_t startpos = 0; |
272 | 0 | size_t pos = 0; |
273 | 0 | int c; |
274 | |
|
275 | 0 | if (data == NULL) len = 0; |
276 | |
|
277 | 0 | do { |
278 | | // Get the next character, or use -1 to indicate end of input. |
279 | 0 | if (pos < len) c = data[pos]; |
280 | 0 | else c = -1; |
281 | |
|
282 | 0 | switch (urlenp->_state) { |
283 | | |
284 | 0 | case HTP_URLENP_STATE_KEY: |
285 | | // Look for =, argument separator, or end of input. |
286 | 0 | if ((c == '=') || (c == urlenp->argument_separator) || (c == -1)) { |
287 | | // Data from startpos to pos. |
288 | 0 | htp_urlenp_add_field_piece(urlenp, data, startpos, pos, c); |
289 | | |
290 | | // If it's not the end of input, then it must be the end of this field. |
291 | 0 | if (c != -1) { |
292 | | // Next state. |
293 | 0 | startpos = pos + 1; |
294 | |
|
295 | 0 | if (c == urlenp->argument_separator) { |
296 | 0 | urlenp->_state = HTP_URLENP_STATE_KEY; |
297 | 0 | } else { |
298 | 0 | urlenp->_state = HTP_URLENP_STATE_VALUE; |
299 | 0 | } |
300 | 0 | } |
301 | 0 | } |
302 | |
|
303 | 0 | pos++; |
304 | |
|
305 | 0 | break; |
306 | | |
307 | 0 | case HTP_URLENP_STATE_VALUE: |
308 | | // Look for argument separator or end of input. |
309 | 0 | if ((c == urlenp->argument_separator) || (c == -1)) { |
310 | | // Data from startpos to pos. |
311 | 0 | htp_urlenp_add_field_piece(urlenp, data, startpos, pos, c); |
312 | | |
313 | | // If it's not the end of input, then it must be the end of this field. |
314 | 0 | if (c != -1) { |
315 | | // Next state. |
316 | 0 | startpos = pos + 1; |
317 | 0 | urlenp->_state = HTP_URLENP_STATE_KEY; |
318 | 0 | } |
319 | 0 | } |
320 | |
|
321 | 0 | pos++; |
322 | |
|
323 | 0 | break; |
324 | | |
325 | 0 | default: |
326 | | // Invalid state. |
327 | 0 | return HTP_ERROR; |
328 | 0 | } |
329 | 0 | } while (c != -1); |
330 | | |
331 | 0 | return HTP_OK; |
332 | 0 | } |