/src/tarantool/src/lib/http_parser/http_parser.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright 2010-2017, Tarantool AUTHORS, please see AUTHORS file. |
3 | | * |
4 | | * Redistribution and use in source and binary forms, with or |
5 | | * without modification, are permitted provided that the following |
6 | | * conditions are met: |
7 | | * |
8 | | * 1. Redistributions of source code must retain the above |
9 | | * copyright notice, this list of conditions and the |
10 | | * following disclaimer. |
11 | | * |
12 | | * 2. Redistributions in binary form must reproduce the above |
13 | | * copyright notice, this list of conditions and the following |
14 | | * disclaimer in the documentation and/or other materials |
15 | | * provided with the distribution. |
16 | | * |
17 | | * THIS SOFTWARE IS PROVIDED BY AUTHORS ``AS IS'' AND |
18 | | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
19 | | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
20 | | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL |
21 | | * AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, |
22 | | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
24 | | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
25 | | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
26 | | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
27 | | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF |
28 | | * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
29 | | * SUCH DAMAGE. |
30 | | */ |
31 | | #include "http_parser.h" |
32 | | #include <string.h> |
33 | | |
34 | 1.07M | #define LF (unsigned char) '\n' |
35 | 1.07M | #define CR (unsigned char) '\r' |
36 | | #define CRLF "\r\n" |
37 | | |
38 | | /** |
39 | | * Following http parser functions were taken with slight |
40 | | * adaptation from nginx http parser module |
41 | | */ |
42 | | |
43 | | void http_parser_create(struct http_parser *parser) |
44 | 429 | { |
45 | 429 | parser->hdr_value_start = NULL; |
46 | 429 | parser->hdr_value_end = NULL; |
47 | 429 | parser->http_major = -1; |
48 | 429 | parser->http_minor = -1; |
49 | 429 | parser->hdr_name = NULL; |
50 | 429 | parser->hdr_name_idx = 0; |
51 | 429 | } |
52 | | |
53 | | /** |
54 | | * Utility function used in headers parsing |
55 | | */ |
56 | | static int |
57 | | http_parse_status_line(struct http_parser *parser, const char **bufp, |
58 | | const char *end_buf) |
59 | 221 | { |
60 | 221 | char ch; |
61 | 221 | const char *p = *bufp; |
62 | 221 | enum { |
63 | 221 | sw_start = 0, |
64 | 221 | sw_H, |
65 | 221 | sw_HT, |
66 | 221 | sw_HTT, |
67 | 221 | sw_HTTP, |
68 | 221 | sw_first_major_digit, |
69 | 221 | sw_major_digit, |
70 | 221 | sw_first_minor_digit, |
71 | 221 | sw_minor_digit, |
72 | 221 | sw_status, |
73 | 221 | sw_space_after_status, |
74 | 221 | sw_status_text, |
75 | 221 | sw_almost_done |
76 | 221 | } state; |
77 | | |
78 | 221 | state = sw_start; |
79 | 221 | int status_count = 0; |
80 | 3.51k | for (;p < end_buf; p++) { |
81 | 3.43k | ch = *p; |
82 | 3.43k | switch (state) { |
83 | | /* "HTTP/" */ |
84 | 221 | case sw_start: |
85 | 221 | if (ch == 'H') |
86 | 221 | state = sw_H; |
87 | 0 | else |
88 | 0 | return HTTP_PARSE_INVALID; |
89 | 221 | break; |
90 | 221 | case sw_H: |
91 | 221 | if (ch == 'T') |
92 | 221 | state = sw_HT; |
93 | 0 | else |
94 | 0 | return HTTP_PARSE_INVALID; |
95 | 221 | break; |
96 | 221 | case sw_HT: |
97 | 221 | if (ch == 'T') |
98 | 221 | state = sw_HTT; |
99 | 0 | else |
100 | 0 | return HTTP_PARSE_INVALID; |
101 | 221 | break; |
102 | 221 | case sw_HTT: |
103 | 221 | if (ch == 'P') |
104 | 221 | state = sw_HTTP; |
105 | 0 | else |
106 | 0 | return HTTP_PARSE_INVALID; |
107 | 221 | break; |
108 | 221 | case sw_HTTP: |
109 | 221 | if (ch == '/') |
110 | 221 | state = sw_first_major_digit; |
111 | 0 | else |
112 | 0 | return HTTP_PARSE_INVALID; |
113 | 221 | break; |
114 | | /* The first digit of major HTTP version */ |
115 | 221 | case sw_first_major_digit: |
116 | 220 | if (ch < '1' || ch > '9') { |
117 | 22 | return HTTP_PARSE_INVALID; |
118 | 22 | } |
119 | 198 | parser->http_major = ch - '0'; |
120 | 198 | state = sw_major_digit; |
121 | 198 | break; |
122 | | /* The major HTTP version or dot */ |
123 | 208 | case sw_major_digit: |
124 | 208 | if (ch == '.') { |
125 | 76 | state = sw_first_minor_digit; |
126 | 76 | break; |
127 | 76 | } |
128 | 132 | if (ch == ' ') { |
129 | 67 | parser->http_minor = 0; |
130 | 67 | state = sw_status; |
131 | 67 | break; |
132 | 67 | } |
133 | 65 | if (ch < '0' || ch > '9') { |
134 | 30 | return HTTP_PARSE_INVALID; |
135 | 30 | } |
136 | 35 | if (parser->http_major > 99) { |
137 | 4 | return HTTP_PARSE_INVALID; |
138 | 4 | } |
139 | 31 | parser->http_major = parser->http_major * 10 |
140 | 31 | + (ch - '0'); |
141 | 31 | break; |
142 | | /* The first digit of minor HTTP version */ |
143 | 75 | case sw_first_minor_digit: |
144 | 75 | if (ch < '0' || ch > '9') { |
145 | 21 | return HTTP_PARSE_INVALID; |
146 | 21 | } |
147 | 54 | parser->http_minor = ch - '0'; |
148 | 54 | state = sw_minor_digit; |
149 | 54 | break; |
150 | | /* |
151 | | * The minor HTTP version or |
152 | | * the end of the request line |
153 | | */ |
154 | 258 | case sw_minor_digit: |
155 | 258 | if (ch == ' ') { |
156 | 1 | state = sw_status; |
157 | 1 | break; |
158 | 1 | } |
159 | 257 | if (ch < '0' || ch > '9') { |
160 | 27 | return HTTP_PARSE_INVALID; |
161 | 27 | } |
162 | 230 | if (parser->http_minor > 99) { |
163 | 3 | return HTTP_PARSE_INVALID; |
164 | 3 | } |
165 | 227 | parser->http_minor = parser->http_minor * 10 |
166 | 227 | + (ch - '0'); |
167 | 227 | break; |
168 | | /* HTTP status code */ |
169 | 448 | case sw_status: |
170 | 448 | if (ch == ' ') { |
171 | 343 | break; |
172 | 343 | } |
173 | 105 | if (ch < '0' || ch > '9') { |
174 | 26 | return HTTP_PARSE_INVALID; |
175 | 26 | } |
176 | 79 | if (++status_count == 3) { |
177 | 24 | state = sw_space_after_status; |
178 | 24 | } |
179 | 79 | break; |
180 | | /* Space or end of line */ |
181 | 23 | case sw_space_after_status: |
182 | 23 | switch (ch) { |
183 | 15 | case ' ': |
184 | 15 | state = sw_status_text; |
185 | 15 | break; |
186 | 3 | case '.': |
187 | | /* IIS may send 403.1, 403.2, etc */ |
188 | 3 | state = sw_status_text; |
189 | 3 | break; |
190 | 3 | case CR: |
191 | 3 | state = sw_almost_done; |
192 | 3 | break; |
193 | 1 | case LF: |
194 | 1 | goto done; |
195 | 1 | default: |
196 | 1 | return HTTP_PARSE_INVALID; |
197 | 23 | } |
198 | 21 | break; |
199 | | /* Any text until end of line */ |
200 | 1.09k | case sw_status_text: |
201 | 1.09k | switch (ch) { |
202 | 1 | case CR: |
203 | 1 | state = sw_almost_done; |
204 | 1 | break; |
205 | 1 | case LF: |
206 | 1 | goto done; |
207 | 1.09k | } |
208 | 1.09k | break; |
209 | | |
210 | | /* End of status line */ |
211 | 1.09k | case sw_almost_done: |
212 | 2 | switch (ch) { |
213 | 1 | case LF: |
214 | 1 | goto done; |
215 | 1 | default: |
216 | 1 | return HTTP_PARSE_INVALID; |
217 | 2 | } |
218 | 3.43k | } |
219 | 3.43k | } |
220 | 86 | done: |
221 | 86 | *bufp = p + 1; |
222 | 86 | return HTTP_PARSE_OK; |
223 | 221 | } |
224 | | |
225 | | int |
226 | | http_parse_header_line(struct http_parser *prsr, const char **bufp, |
227 | | const char *end_buf, int max_hname_len) |
228 | 429 | { |
229 | 429 | char c; |
230 | 429 | unsigned char ch; |
231 | 429 | const char *p = *bufp; |
232 | 429 | const char *header_name_start = p; |
233 | 429 | prsr->hdr_name_idx = 0; |
234 | | |
235 | 429 | enum { |
236 | 429 | sw_start = 0, |
237 | 429 | skip_status_line, |
238 | 429 | skipped_status_line_almost_done, |
239 | 429 | sw_name, |
240 | 429 | sw_space_before_value, |
241 | 429 | sw_value, |
242 | 429 | sw_space_after_value, |
243 | 429 | sw_almost_done, |
244 | 429 | sw_header_almost_done |
245 | 429 | } state = sw_start; |
246 | | |
247 | | /* |
248 | | * The last '\0' is not needed |
249 | | * because string is zero terminated |
250 | | */ |
251 | 429 | static char lowcase[] = |
252 | 429 | "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" |
253 | 429 | "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0-\0\0" "0123456789" |
254 | 429 | "\0\0\0\0\0\0\0abcdefghijklmnopqrstuvwxyz\0\0\0\0_\0" |
255 | 429 | "abcdefghijklmnopqrstuvwxyz\0\0\0\0\0\0\0\0\0\0\0\0\0\0" |
256 | 429 | "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" |
257 | 429 | "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" |
258 | 429 | "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" |
259 | 429 | "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" |
260 | 429 | "\0\0\0\0\0\0\0\0\0\0"; |
261 | | |
262 | 5.24M | for (; p < end_buf; p++) { |
263 | 5.24M | ch = *p; |
264 | 5.24M | switch (state) { |
265 | | /* first char */ |
266 | 564 | case sw_start: |
267 | 564 | switch (ch) { |
268 | 13 | case CR: |
269 | 13 | prsr->hdr_value_end = p; |
270 | 13 | state = sw_header_almost_done; |
271 | 13 | break; |
272 | 2 | case LF: |
273 | 2 | prsr->hdr_value_end = p; |
274 | 2 | goto header_done; |
275 | 549 | default: |
276 | 549 | state = sw_name; |
277 | 549 | c = lowcase[ch]; |
278 | 549 | if (c != 0) { |
279 | 441 | prsr->hdr_name[0] = c; |
280 | 441 | prsr->hdr_name_idx = 1; |
281 | 441 | break; |
282 | 441 | } |
283 | 108 | if (ch == '\0') { |
284 | 1 | return HTTP_PARSE_INVALID; |
285 | 1 | } |
286 | 107 | break; |
287 | 564 | } |
288 | 561 | break; |
289 | 1.94k | case skip_status_line: |
290 | 1.94k | switch (ch) { |
291 | 2 | case LF: |
292 | 2 | goto skipped_status; |
293 | 3 | case CR: |
294 | 3 | state = skipped_status_line_almost_done; |
295 | 1.93k | default: |
296 | 1.93k | break; |
297 | 1.94k | } |
298 | 1.93k | break; |
299 | 1.93k | case skipped_status_line_almost_done: |
300 | 1 | switch (ch) { |
301 | 1 | case LF: |
302 | 1 | goto skipped_status; |
303 | 0 | case CR: |
304 | 0 | break; |
305 | 0 | default: |
306 | 0 | return HTTP_PARSE_INVALID; |
307 | 1 | } |
308 | 0 | break; |
309 | | /* http_header name */ |
310 | 5.24M | case sw_name: |
311 | 5.24M | c = lowcase[ch]; |
312 | 5.24M | if (c != 0) { |
313 | 4.16M | if (prsr->hdr_name_idx < max_hname_len) { |
314 | 4.16M | prsr->hdr_name[prsr->hdr_name_idx] = c; |
315 | 4.16M | prsr->hdr_name_idx++; |
316 | 4.16M | } |
317 | 4.16M | break; |
318 | 4.16M | } |
319 | 1.07M | if (ch == ':') { |
320 | 51 | state = sw_space_before_value; |
321 | 51 | break; |
322 | 51 | } |
323 | 1.07M | if (ch == CR) { |
324 | 13 | prsr->hdr_value_start = p; |
325 | 13 | prsr->hdr_value_end = p; |
326 | 13 | state = sw_almost_done; |
327 | 13 | break; |
328 | 13 | } |
329 | 1.07M | if (ch == LF) { |
330 | 1 | prsr->hdr_value_start = p; |
331 | 1 | prsr->hdr_value_end = p; |
332 | 1 | goto done; |
333 | 1 | } |
334 | | /* handle "HTTP/1.1 ..." lines */ |
335 | 1.07M | if (ch == '/' && p - header_name_start == 4 && |
336 | 1.07M | strncmp(header_name_start, "HTTP", 4) == 0) { |
337 | 221 | int rc = http_parse_status_line(prsr, |
338 | 221 | &header_name_start, |
339 | 221 | end_buf); |
340 | 221 | if (rc == HTTP_PARSE_INVALID) { |
341 | 135 | prsr->http_minor = -1; |
342 | 135 | prsr->http_major = -1; |
343 | 135 | state = sw_start; |
344 | 135 | } else { |
345 | | /* Skip it till end of line. */ |
346 | 86 | state = skip_status_line; |
347 | 86 | } |
348 | 221 | break; |
349 | 221 | } |
350 | 1.07M | if (ch == '\0') |
351 | 5 | return HTTP_PARSE_INVALID; |
352 | 1.07M | break; |
353 | | /* space* before http_header value */ |
354 | 1.07M | case sw_space_before_value: |
355 | 236 | switch (ch) { |
356 | 194 | case ' ': |
357 | 194 | break; |
358 | 1 | case CR: |
359 | 1 | prsr->hdr_value_start = p; |
360 | 1 | prsr->hdr_value_end = p; |
361 | 1 | state = sw_almost_done; |
362 | 1 | break; |
363 | 1 | case LF: |
364 | 1 | prsr->hdr_value_start = p; |
365 | 1 | prsr->hdr_value_end = p; |
366 | 1 | goto done; |
367 | 1 | case '\0': |
368 | 1 | return HTTP_PARSE_INVALID; |
369 | 39 | default: |
370 | 39 | prsr->hdr_value_start = p; |
371 | 39 | state = sw_value; |
372 | 39 | break; |
373 | 236 | } |
374 | 234 | break; |
375 | | |
376 | | /* http_header value */ |
377 | 596 | case sw_value: |
378 | 596 | switch (ch) { |
379 | 399 | case ' ': |
380 | 399 | prsr->hdr_value_end = p; |
381 | 399 | state = sw_space_after_value; |
382 | 399 | break; |
383 | 1 | case CR: |
384 | 1 | prsr->hdr_value_end = p; |
385 | 1 | state = sw_almost_done; |
386 | 1 | break; |
387 | 1 | case LF: |
388 | 1 | prsr->hdr_value_end = p; |
389 | 1 | goto done; |
390 | 1 | case '\0': |
391 | 1 | return HTTP_PARSE_INVALID; |
392 | 596 | } |
393 | 594 | break; |
394 | | /* space* before end of http_header line */ |
395 | 594 | case sw_space_after_value: |
396 | 577 | switch (ch) { |
397 | 194 | case ' ': |
398 | 194 | break; |
399 | 1 | case CR: |
400 | 1 | state = sw_almost_done; |
401 | 1 | break; |
402 | 1 | case LF: |
403 | 1 | goto done; |
404 | 1 | case '\0': |
405 | 1 | return HTTP_PARSE_INVALID; |
406 | 380 | default: |
407 | 380 | state = sw_value; |
408 | 380 | break; |
409 | 577 | } |
410 | 575 | break; |
411 | | /* end of http_header line */ |
412 | 575 | case sw_almost_done: |
413 | 323 | switch (ch) { |
414 | 1 | case LF: |
415 | 1 | goto done; |
416 | 320 | case CR: |
417 | 320 | break; |
418 | 2 | default: |
419 | 2 | return HTTP_PARSE_INVALID; |
420 | 323 | } |
421 | 320 | break; |
422 | | /* end of http_header */ |
423 | 320 | case sw_header_almost_done: |
424 | 12 | if (ch == LF) |
425 | 1 | goto header_done; |
426 | 11 | else |
427 | 11 | return HTTP_PARSE_INVALID; |
428 | 5.24M | } |
429 | 5.24M | } |
430 | | |
431 | 399 | skipped_status: |
432 | 399 | *bufp = p + 1; |
433 | 399 | return HTTP_PARSE_CONTINUE; |
434 | | |
435 | 5 | done: |
436 | 5 | *bufp = p + 1; |
437 | 5 | return HTTP_PARSE_OK; |
438 | | |
439 | 3 | header_done: |
440 | 3 | *bufp = p + 1; |
441 | 3 | return HTTP_PARSE_DONE; |
442 | 429 | } |