Coverage Report

Created: 2025-07-04 06:06

/src/http-parser/http_parser.c
Line
Count
Source (jump to first uncovered line)
1
/* Copyright Joyent, Inc. and other Node contributors.
2
 *
3
 * Permission is hereby granted, free of charge, to any person obtaining a copy
4
 * of this software and associated documentation files (the "Software"), to
5
 * deal in the Software without restriction, including without limitation the
6
 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
7
 * sell copies of the Software, and to permit persons to whom the Software is
8
 * furnished to do so, subject to the following conditions:
9
 *
10
 * The above copyright notice and this permission notice shall be included in
11
 * all copies or substantial portions of the Software.
12
 *
13
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19
 * IN THE SOFTWARE.
20
 */
21
#include "http_parser.h"
22
#include <assert.h>
23
#include <stddef.h>
24
#include <ctype.h>
25
#include <string.h>
26
#include <limits.h>
27
28
static uint32_t max_header_size = HTTP_MAX_HEADER_SIZE;
29
30
#ifndef ULLONG_MAX
31
# define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
32
#endif
33
34
#ifndef MIN
35
113k
# define MIN(a,b) ((a) < (b) ? (a) : (b))
36
#endif
37
38
#ifndef ARRAY_SIZE
39
0
# define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
40
#endif
41
42
#ifndef BIT_AT
43
# define BIT_AT(a, i)                                                \
44
1.45M
  (!!((unsigned int) (a)[(unsigned int) (i) >> 3] &                  \
45
1.45M
   (1 << ((unsigned int) (i) & 7))))
46
#endif
47
48
#ifndef ELEM_AT
49
0
# define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
50
#endif
51
52
1.06k
#define SET_ERRNO(e)                                                 \
53
1.06k
do {                                                                 \
54
1.06k
  parser->nread = nread;                                             \
55
1.06k
  parser->http_errno = (e);                                          \
56
1.06k
} while(0)
57
58
903k
#define CURRENT_STATE() p_state
59
782k
#define UPDATE_STATE(V) p_state = (enum state) (V);
60
4.32k
#define RETURN(V)                                                    \
61
4.32k
do {                                                                 \
62
4.32k
  parser->nread = nread;                                             \
63
4.32k
  parser->state = CURRENT_STATE();                                   \
64
4.32k
  return (V);                                                        \
65
4.32k
} while (0);
66
#define REEXECUTE()                                                  \
67
186k
  goto reexecute;                                                    \
68
69
70
#ifdef __GNUC__
71
265k
# define LIKELY(X) __builtin_expect(!!(X), 1)
72
1.12M
# define UNLIKELY(X) __builtin_expect(!!(X), 0)
73
#else
74
# define LIKELY(X) (X)
75
# define UNLIKELY(X) (X)
76
#endif
77
78
79
/* Run the notify callback FOR, returning ER if it fails */
80
34.3k
#define CALLBACK_NOTIFY_(FOR, ER)                                    \
81
34.3k
do {                                                                 \
82
34.3k
  assert(HTTP_PARSER_ERRNO(parser) == HPE_OK);                       \
83
34.3k
                                                                     \
84
34.3k
  if (LIKELY(settings->on_##FOR)) {                                  \
85
0
    parser->state = CURRENT_STATE();                                 \
86
0
    if (UNLIKELY(0 != settings->on_##FOR(parser))) {                 \
87
0
      SET_ERRNO(HPE_CB_##FOR);                                       \
88
0
    }                                                                \
89
0
    UPDATE_STATE(parser->state);                                     \
90
0
                                                                     \
91
0
    /* We either errored above or got paused; get out */             \
92
0
    if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) {             \
93
0
      return (ER);                                                   \
94
0
    }                                                                \
95
0
  }                                                                  \
96
34.3k
} while (0)
97
98
/* Run the notify callback FOR and consume the current byte */
99
33.9k
#define CALLBACK_NOTIFY(FOR)            CALLBACK_NOTIFY_(FOR, p - data + 1)
100
101
/* Run the notify callback FOR and don't consume the current byte */
102
343
#define CALLBACK_NOTIFY_NOADVANCE(FOR)  CALLBACK_NOTIFY_(FOR, p - data)
103
104
/* Run data callback FOR with LEN bytes, returning ER if it fails */
105
245k
#define CALLBACK_DATA_(FOR, LEN, ER)                                 \
106
245k
do {                                                                 \
107
245k
  assert(HTTP_PARSER_ERRNO(parser) == HPE_OK);                       \
108
245k
                                                                     \
109
245k
  if (FOR##_mark) {                                                  \
110
231k
    if (LIKELY(settings->on_##FOR)) {                                \
111
0
      parser->state = CURRENT_STATE();                               \
112
0
      if (UNLIKELY(0 !=                                              \
113
0
                   settings->on_##FOR(parser, FOR##_mark, (LEN)))) { \
114
0
        SET_ERRNO(HPE_CB_##FOR);                                     \
115
0
      }                                                              \
116
0
      UPDATE_STATE(parser->state);                                   \
117
0
                                                                     \
118
0
      /* We either errored above or got paused; get out */           \
119
0
      if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) {           \
120
0
        return (ER);                                                 \
121
0
      }                                                              \
122
0
    }                                                                \
123
231k
    FOR##_mark = NULL;                                               \
124
231k
  }                                                                  \
125
245k
} while (0)
126
127
/* Run the data callback FOR and consume the current byte */
128
#define CALLBACK_DATA(FOR)                                           \
129
106k
    CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
130
131
/* Run the data callback FOR and don't consume the current byte */
132
#define CALLBACK_DATA_NOADVANCE(FOR)                                 \
133
138k
    CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
134
135
/* Set the mark FOR; non-destructive if mark is already set */
136
231k
#define MARK(FOR)                                                    \
137
231k
do {                                                                 \
138
231k
  if (!FOR##_mark) {                                                 \
139
231k
    FOR##_mark = p;                                                  \
140
231k
  }                                                                  \
141
231k
} while (0)
142
143
/* Don't allow the total size of the HTTP headers (including the status
144
 * line) to exceed max_header_size.  This check is here to protect
145
 * embedders against denial-of-service attacks where the attacker feeds
146
 * us a never-ending header that the embedder keeps buffering.
147
 *
148
 * This check is arguably the responsibility of embedders but we're doing
149
 * it on the embedder's behalf because most won't bother and this way we
150
 * make the web a little safer.  max_header_size is still far bigger
151
 * than any reasonable request or response so this should never affect
152
 * day-to-day operation.
153
 */
154
809k
#define COUNT_HEADER_SIZE(V)                                         \
155
809k
do {                                                                 \
156
809k
  nread += (uint32_t)(V);                                            \
157
809k
  if (UNLIKELY(nread > max_header_size)) {                           \
158
130
    SET_ERRNO(HPE_HEADER_OVERFLOW);                                  \
159
130
    goto error;                                                      \
160
130
  }                                                                  \
161
809k
} while (0)
162
163
164
31.1k
#define PROXY_CONNECTION "proxy-connection"
165
226k
#define CONNECTION "connection"
166
74.7k
#define CONTENT_LENGTH "content-length"
167
301k
#define TRANSFER_ENCODING "transfer-encoding"
168
78.5k
#define UPGRADE "upgrade"
169
54.1k
#define CHUNKED "chunked"
170
106k
#define KEEP_ALIVE "keep-alive"
171
57.8k
#define CLOSE "close"
172
173
174
static const char *method_strings[] =
175
  {
176
#define XX(num, name, string) #string,
177
  HTTP_METHOD_MAP(XX)
178
#undef XX
179
  };
180
181
182
/* Tokens as defined by rfc 2616. Also lowercases them.
183
 *        token       = 1*<any CHAR except CTLs or separators>
184
 *     separators     = "(" | ")" | "<" | ">" | "@"
185
 *                    | "," | ";" | ":" | "\" | <">
186
 *                    | "/" | "[" | "]" | "?" | "="
187
 *                    | "{" | "}" | SP | HT
188
 */
189
static const char tokens[256] = {
190
/*   0 nul    1 soh    2 stx    3 etx    4 eot    5 enq    6 ack    7 bel  */
191
        0,       0,       0,       0,       0,       0,       0,       0,
192
/*   8 bs     9 ht    10 nl    11 vt    12 np    13 cr    14 so    15 si   */
193
        0,       0,       0,       0,       0,       0,       0,       0,
194
/*  16 dle   17 dc1   18 dc2   19 dc3   20 dc4   21 nak   22 syn   23 etb */
195
        0,       0,       0,       0,       0,       0,       0,       0,
196
/*  24 can   25 em    26 sub   27 esc   28 fs    29 gs    30 rs    31 us  */
197
        0,       0,       0,       0,       0,       0,       0,       0,
198
/*  32 sp    33  !    34  "    35  #    36  $    37  %    38  &    39  '  */
199
       ' ',     '!',      0,      '#',     '$',     '%',     '&',    '\'',
200
/*  40  (    41  )    42  *    43  +    44  ,    45  -    46  .    47  /  */
201
        0,       0,      '*',     '+',      0,      '-',     '.',      0,
202
/*  48  0    49  1    50  2    51  3    52  4    53  5    54  6    55  7  */
203
       '0',     '1',     '2',     '3',     '4',     '5',     '6',     '7',
204
/*  56  8    57  9    58  :    59  ;    60  <    61  =    62  >    63  ?  */
205
       '8',     '9',      0,       0,       0,       0,       0,       0,
206
/*  64  @    65  A    66  B    67  C    68  D    69  E    70  F    71  G  */
207
        0,      'a',     'b',     'c',     'd',     'e',     'f',     'g',
208
/*  72  H    73  I    74  J    75  K    76  L    77  M    78  N    79  O  */
209
       'h',     'i',     'j',     'k',     'l',     'm',     'n',     'o',
210
/*  80  P    81  Q    82  R    83  S    84  T    85  U    86  V    87  W  */
211
       'p',     'q',     'r',     's',     't',     'u',     'v',     'w',
212
/*  88  X    89  Y    90  Z    91  [    92  \    93  ]    94  ^    95  _  */
213
       'x',     'y',     'z',      0,       0,       0,      '^',     '_',
214
/*  96  `    97  a    98  b    99  c   100  d   101  e   102  f   103  g  */
215
       '`',     'a',     'b',     'c',     'd',     'e',     'f',     'g',
216
/* 104  h   105  i   106  j   107  k   108  l   109  m   110  n   111  o  */
217
       'h',     'i',     'j',     'k',     'l',     'm',     'n',     'o',
218
/* 112  p   113  q   114  r   115  s   116  t   117  u   118  v   119  w  */
219
       'p',     'q',     'r',     's',     't',     'u',     'v',     'w',
220
/* 120  x   121  y   122  z   123  {   124  |   125  }   126  ~   127 del */
221
       'x',     'y',     'z',      0,      '|',      0,      '~',       0 };
222
223
224
static const int8_t unhex[256] =
225
  {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
226
  ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
227
  ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
228
  , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
229
  ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
230
  ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
231
  ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
232
  ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
233
  };
234
235
236
#if HTTP_PARSER_STRICT
237
# define T(v) 0
238
#else
239
# define T(v) v
240
#endif
241
242
243
static const uint8_t normal_url_char[32] = {
244
/*   0 nul    1 soh    2 stx    3 etx    4 eot    5 enq    6 ack    7 bel  */
245
        0    |   0    |   0    |   0    |   0    |   0    |   0    |   0,
246
/*   8 bs     9 ht    10 nl    11 vt    12 np    13 cr    14 so    15 si   */
247
        0    | T(2)   |   0    |   0    | T(16)  |   0    |   0    |   0,
248
/*  16 dle   17 dc1   18 dc2   19 dc3   20 dc4   21 nak   22 syn   23 etb */
249
        0    |   0    |   0    |   0    |   0    |   0    |   0    |   0,
250
/*  24 can   25 em    26 sub   27 esc   28 fs    29 gs    30 rs    31 us  */
251
        0    |   0    |   0    |   0    |   0    |   0    |   0    |   0,
252
/*  32 sp    33  !    34  "    35  #    36  $    37  %    38  &    39  '  */
253
        0    |   2    |   4    |   0    |   16   |   32   |   64   |  128,
254
/*  40  (    41  )    42  *    43  +    44  ,    45  -    46  .    47  /  */
255
        1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
256
/*  48  0    49  1    50  2    51  3    52  4    53  5    54  6    55  7  */
257
        1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
258
/*  56  8    57  9    58  :    59  ;    60  <    61  =    62  >    63  ?  */
259
        1    |   2    |   4    |   8    |   16   |   32   |   64   |   0,
260
/*  64  @    65  A    66  B    67  C    68  D    69  E    70  F    71  G  */
261
        1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
262
/*  72  H    73  I    74  J    75  K    76  L    77  M    78  N    79  O  */
263
        1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
264
/*  80  P    81  Q    82  R    83  S    84  T    85  U    86  V    87  W  */
265
        1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
266
/*  88  X    89  Y    90  Z    91  [    92  \    93  ]    94  ^    95  _  */
267
        1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
268
/*  96  `    97  a    98  b    99  c   100  d   101  e   102  f   103  g  */
269
        1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
270
/* 104  h   105  i   106  j   107  k   108  l   109  m   110  n   111  o  */
271
        1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
272
/* 112  p   113  q   114  r   115  s   116  t   117  u   118  v   119  w  */
273
        1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
274
/* 120  x   121  y   122  z   123  {   124  |   125  }   126  ~   127 del */
275
        1    |   2    |   4    |   8    |   16   |   32   |   64   |   0, };
276
277
#undef T
278
279
enum state
280
  { s_dead = 1 /* important that this is > 0 */
281
282
  , s_start_req_or_res
283
  , s_res_or_resp_H
284
  , s_start_res
285
  , s_res_H
286
  , s_res_HT
287
  , s_res_HTT
288
  , s_res_HTTP
289
  , s_res_http_major
290
  , s_res_http_dot
291
  , s_res_http_minor
292
  , s_res_http_end
293
  , s_res_first_status_code
294
  , s_res_status_code
295
  , s_res_status_start
296
  , s_res_status
297
  , s_res_line_almost_done
298
299
  , s_start_req
300
301
  , s_req_method
302
  , s_req_spaces_before_url
303
  , s_req_schema
304
  , s_req_schema_slash
305
  , s_req_schema_slash_slash
306
  , s_req_server_start
307
  , s_req_server
308
  , s_req_server_with_at
309
  , s_req_path
310
  , s_req_query_string_start
311
  , s_req_query_string
312
  , s_req_fragment_start
313
  , s_req_fragment
314
  , s_req_http_start
315
  , s_req_http_H
316
  , s_req_http_HT
317
  , s_req_http_HTT
318
  , s_req_http_HTTP
319
  , s_req_http_I
320
  , s_req_http_IC
321
  , s_req_http_major
322
  , s_req_http_dot
323
  , s_req_http_minor
324
  , s_req_http_end
325
  , s_req_line_almost_done
326
327
  , s_header_field_start
328
  , s_header_field
329
  , s_header_value_discard_ws
330
  , s_header_value_discard_ws_almost_done
331
  , s_header_value_discard_lws
332
  , s_header_value_start
333
  , s_header_value
334
  , s_header_value_lws
335
336
  , s_header_almost_done
337
338
  , s_chunk_size_start
339
  , s_chunk_size
340
  , s_chunk_parameters
341
  , s_chunk_size_almost_done
342
343
  , s_headers_almost_done
344
  , s_headers_done
345
346
  /* Important: 's_headers_done' must be the last 'header' state. All
347
   * states beyond this must be 'body' states. It is used for overflow
348
   * checking. See the PARSING_HEADER() macro.
349
   */
350
351
  , s_chunk_data
352
  , s_chunk_data_almost_done
353
  , s_chunk_data_done
354
355
  , s_body_identity
356
  , s_body_identity_eof
357
358
  , s_message_done
359
  };
360
361
362
642k
#define PARSING_HEADER(state) (state <= s_headers_done)
363
364
365
enum header_states
366
  { h_general = 0
367
  , h_C
368
  , h_CO
369
  , h_CON
370
371
  , h_matching_connection
372
  , h_matching_proxy_connection
373
  , h_matching_content_length
374
  , h_matching_transfer_encoding
375
  , h_matching_upgrade
376
377
  , h_connection
378
  , h_content_length
379
  , h_content_length_num
380
  , h_content_length_ws
381
  , h_transfer_encoding
382
  , h_upgrade
383
384
  , h_matching_transfer_encoding_token_start
385
  , h_matching_transfer_encoding_chunked
386
  , h_matching_transfer_encoding_token
387
388
  , h_matching_connection_token_start
389
  , h_matching_connection_keep_alive
390
  , h_matching_connection_close
391
  , h_matching_connection_upgrade
392
  , h_matching_connection_token
393
394
  , h_transfer_encoding_chunked
395
  , h_connection_keep_alive
396
  , h_connection_close
397
  , h_connection_upgrade
398
  };
399
400
enum http_host_state
401
  {
402
    s_http_host_dead = 1
403
  , s_http_userinfo_start
404
  , s_http_userinfo
405
  , s_http_host_start
406
  , s_http_host_v6_start
407
  , s_http_host
408
  , s_http_host_v6
409
  , s_http_host_v6_end
410
  , s_http_host_v6_zone_start
411
  , s_http_host_v6_zone
412
  , s_http_host_port_start
413
  , s_http_host_port
414
};
415
416
/* Macros for character classes; depends on strict-mode  */
417
60.9M
#define CR                  '\r'
418
47.0M
#define LF                  '\n'
419
6.37M
#define LOWER(c)            (unsigned char)(c | 0x20)
420
6.69M
#define IS_ALPHA(c)         (LOWER(c) >= 'a' && LOWER(c) <= 'z')
421
2.69M
#define IS_NUM(c)           ((c) >= '0' && (c) <= '9')
422
6.42M
#define IS_ALPHANUM(c)      (IS_ALPHA(c) || IS_NUM(c))
423
2.97k
#define IS_HEX(c)           (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
424
3.29M
#define IS_MARK(c)          ((c) == '-' || (c) == '_' || (c) == '.' || \
425
279k
  (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
426
3.29M
  (c) == ')')
427
5.75M
#define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
428
3.01M
  (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
429
3.01M
  (c) == '$' || (c) == ',')
430
431
62.8k
#define STRICT_TOKEN(c)     ((c == ' ') ? 0 : tokens[(unsigned char)c])
432
433
#if HTTP_PARSER_STRICT
434
#define TOKEN(c)            STRICT_TOKEN(c)
435
#define IS_URL_CHAR(c)      (BIT_AT(normal_url_char, (unsigned char)c))
436
#define IS_HOST_CHAR(c)     (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
437
#else
438
10.1M
#define TOKEN(c)            tokens[(unsigned char)c]
439
#define IS_URL_CHAR(c)                                                         \
440
727k
  (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
441
#define IS_HOST_CHAR(c)                                                        \
442
199k
  (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
443
#endif
444
445
/**
446
 * Verify that a char is a valid visible (printable) US-ASCII
447
 * character or %x80-FF
448
 **/
449
#define IS_HEADER_CHAR(ch)                                                     \
450
15.5M
  (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127))
451
452
#define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
453
454
455
#if HTTP_PARSER_STRICT
456
# define STRICT_CHECK(cond)                                          \
457
do {                                                                 \
458
  if (cond) {                                                        \
459
    SET_ERRNO(HPE_STRICT);                                           \
460
    goto error;                                                      \
461
  }                                                                  \
462
} while (0)
463
# define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
464
#else
465
# define STRICT_CHECK(cond)
466
# define NEW_MESSAGE() start_state
467
#endif
468
469
470
/* Map errno values to strings for human-readable output */
471
#define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
472
static struct {
473
  const char *name;
474
  const char *description;
475
} http_strerror_tab[] = {
476
  HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
477
};
478
#undef HTTP_STRERROR_GEN
479
480
int http_message_needs_eof(const http_parser *parser);
481
482
/* Our URL parser.
483
 *
484
 * This is designed to be shared by http_parser_execute() for URL validation,
485
 * hence it has a state transition + byte-for-byte interface. In addition, it
486
 * is meant to be embedded in http_parser_parse_url(), which does the dirty
487
 * work of turning state transitions URL components for its API.
488
 *
489
 * This function should only be invoked with non-space characters. It is
490
 * assumed that the caller cares about (and can detect) the transition between
491
 * URL and non-URL states by looking for these.
492
 */
493
static enum state
494
parse_url_char(enum state s, const char ch)
495
3.75M
{
496
3.75M
  if (ch == ' ' || ch == '\r' || ch == '\n') {
497
16
    return s_dead;
498
16
  }
499
500
#if HTTP_PARSER_STRICT
501
  if (ch == '\t' || ch == '\f') {
502
    return s_dead;
503
  }
504
#endif
505
506
3.75M
  switch (s) {
507
14.6k
    case s_req_spaces_before_url:
508
      /* Proxied requests are followed by scheme of an absolute URI (alpha).
509
       * All methods except CONNECT are followed by '/' or '*'.
510
       */
511
512
14.6k
      if (ch == '/' || ch == '*') {
513
12.8k
        return s_req_path;
514
12.8k
      }
515
516
1.80k
      if (IS_ALPHA(ch)) {
517
1.07k
        return s_req_schema;
518
1.07k
      }
519
520
727
      break;
521
522
264k
    case s_req_schema:
523
264k
      if (IS_ALPHA(ch)) {
524
263k
        return s;
525
263k
      }
526
527
1.02k
      if (ch == ':') {
528
947
        return s_req_schema_slash;
529
947
      }
530
531
77
      break;
532
533
940
    case s_req_schema_slash:
534
940
      if (ch == '/') {
535
889
        return s_req_schema_slash_slash;
536
889
      }
537
538
51
      break;
539
540
881
    case s_req_schema_slash_slash:
541
881
      if (ch == '/') {
542
859
        return s_req_server_start;
543
859
      }
544
545
22
      break;
546
547
733
    case s_req_server_with_at:
548
733
      if (ch == '@') {
549
2
        return s_dead;
550
2
      }
551
552
    /* fall through */
553
2.61k
    case s_req_server_start:
554
2.74M
    case s_req_server:
555
2.74M
      if (ch == '/') {
556
348
        return s_req_path;
557
348
      }
558
559
2.74M
      if (ch == '?') {
560
438
        return s_req_query_string_start;
561
438
      }
562
563
2.74M
      if (ch == '@') {
564
936
        return s_req_server_with_at;
565
936
      }
566
567
2.74M
      if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
568
2.74M
        return s_req_server;
569
2.74M
      }
570
571
144
      break;
572
573
458k
    case s_req_path:
574
458k
      if (IS_URL_CHAR(ch)) {
575
457k
        return s;
576
457k
      }
577
578
1.17k
      switch (ch) {
579
544
        case '?':
580
544
          return s_req_query_string_start;
581
582
600
        case '#':
583
600
          return s_req_fragment_start;
584
1.17k
      }
585
586
26
      break;
587
588
362
    case s_req_query_string_start:
589
174k
    case s_req_query_string:
590
174k
      if (IS_URL_CHAR(ch)) {
591
173k
        return s_req_query_string;
592
173k
      }
593
594
572
      switch (ch) {
595
397
        case '?':
596
          /* allow extra '?' in query string */
597
397
          return s_req_query_string;
598
599
131
        case '#':
600
131
          return s_req_fragment_start;
601
572
      }
602
603
44
      break;
604
605
1.40k
    case s_req_fragment_start:
606
1.40k
      if (IS_URL_CHAR(ch)) {
607
543
        return s_req_fragment;
608
543
      }
609
610
861
      switch (ch) {
611
8
        case '?':
612
8
          return s_req_fragment;
613
614
811
        case '#':
615
811
          return s;
616
861
      }
617
618
42
      break;
619
620
92.8k
    case s_req_fragment:
621
92.8k
      if (IS_URL_CHAR(ch)) {
622
92.3k
        return s;
623
92.3k
      }
624
625
550
      switch (ch) {
626
267
        case '?':
627
519
        case '#':
628
519
          return s;
629
550
      }
630
631
31
      break;
632
633
31
    default:
634
0
      break;
635
3.75M
  }
636
637
  /* We should never fall out of the switch above unless there's an error */
638
1.16k
  return s_dead;
639
3.75M
}
640
641
size_t http_parser_execute (http_parser *parser,
642
                            const http_parser_settings *settings,
643
                            const char *data,
644
                            size_t len)
645
4.32k
{
646
4.32k
  char c, ch;
647
4.32k
  int8_t unhex_val;
648
4.32k
  const char *p = data;
649
4.32k
  const char *header_field_mark = 0;
650
4.32k
  const char *header_value_mark = 0;
651
4.32k
  const char *url_mark = 0;
652
4.32k
  const char *body_mark = 0;
653
4.32k
  const char *status_mark = 0;
654
4.32k
  enum state p_state = (enum state) parser->state;
655
4.32k
  const unsigned int lenient = parser->lenient_http_headers;
656
4.32k
  const unsigned int allow_chunked_length = parser->allow_chunked_length;
657
658
4.32k
  uint32_t nread = parser->nread;
659
660
  /* We're in an error state. Don't bother doing anything. */
661
4.32k
  if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
662
0
    return 0;
663
0
  }
664
665
4.32k
  if (len == 0) {
666
0
    switch (CURRENT_STATE()) {
667
0
      case s_body_identity_eof:
668
        /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
669
         * we got paused.
670
         */
671
0
        CALLBACK_NOTIFY_NOADVANCE(message_complete);
672
0
        return 0;
673
674
0
      case s_dead:
675
0
      case s_start_req_or_res:
676
0
      case s_start_res:
677
0
      case s_start_req:
678
0
        return 0;
679
680
0
      default:
681
0
        SET_ERRNO(HPE_INVALID_EOF_STATE);
682
0
        return 1;
683
0
    }
684
0
  }
685
686
687
4.32k
  if (CURRENT_STATE() == s_header_field)
688
0
    header_field_mark = data;
689
4.32k
  if (CURRENT_STATE() == s_header_value)
690
0
    header_value_mark = data;
691
4.32k
  switch (CURRENT_STATE()) {
692
0
  case s_req_path:
693
0
  case s_req_schema:
694
0
  case s_req_schema_slash:
695
0
  case s_req_schema_slash_slash:
696
0
  case s_req_server_start:
697
0
  case s_req_server:
698
0
  case s_req_server_with_at:
699
0
  case s_req_query_string_start:
700
0
  case s_req_query_string:
701
0
  case s_req_fragment_start:
702
0
  case s_req_fragment:
703
0
    url_mark = data;
704
0
    break;
705
0
  case s_res_status:
706
0
    status_mark = data;
707
0
    break;
708
4.32k
  default:
709
4.32k
    break;
710
4.32k
  }
711
712
645k
  for (p=data; p != data + len; p++) {
713
642k
    ch = *p;
714
715
642k
    if (PARSING_HEADER(CURRENT_STATE()))
716
637k
      COUNT_HEADER_SIZE(1);
717
718
886k
reexecute:
719
886k
    switch (CURRENT_STATE()) {
720
721
0
      case s_dead:
722
        /* this state is used after a 'Connection: close' message
723
         * the parser will error out if it reads another message
724
         */
725
0
        if (LIKELY(ch == CR || ch == LF))
726
0
          break;
727
728
0
        SET_ERRNO(HPE_CLOSED_CONNECTION);
729
0
        goto error;
730
731
4.69k
      case s_start_req_or_res:
732
4.69k
      {
733
4.69k
        if (ch == CR || ch == LF)
734
389
          break;
735
4.30k
        parser->flags = 0;
736
4.30k
        parser->uses_transfer_encoding = 0;
737
4.30k
        parser->content_length = ULLONG_MAX;
738
739
4.30k
        if (ch == 'H') {
740
447
          UPDATE_STATE(s_res_or_resp_H);
741
742
447
          CALLBACK_NOTIFY(message_begin);
743
3.85k
        } else {
744
3.85k
          parser->type = HTTP_REQUEST;
745
3.85k
          UPDATE_STATE(s_start_req);
746
3.85k
          REEXECUTE();
747
0
        }
748
749
447
        break;
750
4.30k
      }
751
752
447
      case s_res_or_resp_H:
753
446
        if (ch == 'T') {
754
428
          parser->type = HTTP_RESPONSE;
755
428
          UPDATE_STATE(s_res_HT);
756
428
        } else {
757
18
          if (UNLIKELY(ch != 'E')) {
758
16
            SET_ERRNO(HPE_INVALID_CONSTANT);
759
16
            goto error;
760
16
          }
761
762
2
          parser->type = HTTP_REQUEST;
763
2
          parser->method = HTTP_HEAD;
764
2
          parser->index = 2;
765
2
          UPDATE_STATE(s_req_method);
766
2
        }
767
430
        break;
768
769
3.00k
      case s_start_res:
770
3.00k
      {
771
3.00k
        if (ch == CR || ch == LF)
772
392
          break;
773
2.60k
        parser->flags = 0;
774
2.60k
        parser->uses_transfer_encoding = 0;
775
2.60k
        parser->content_length = ULLONG_MAX;
776
777
2.60k
        if (ch == 'H') {
778
2.58k
          UPDATE_STATE(s_res_H);
779
2.58k
        } else {
780
20
          SET_ERRNO(HPE_INVALID_CONSTANT);
781
20
          goto error;
782
20
        }
783
784
2.58k
        CALLBACK_NOTIFY(message_begin);
785
2.58k
        break;
786
2.58k
      }
787
788
2.58k
      case s_res_H:
789
2.58k
        STRICT_CHECK(ch != 'T');
790
2.58k
        UPDATE_STATE(s_res_HT);
791
2.58k
        break;
792
793
3.00k
      case s_res_HT:
794
3.00k
        STRICT_CHECK(ch != 'T');
795
3.00k
        UPDATE_STATE(s_res_HTT);
796
3.00k
        break;
797
798
2.99k
      case s_res_HTT:
799
2.99k
        STRICT_CHECK(ch != 'P');
800
2.99k
        UPDATE_STATE(s_res_HTTP);
801
2.99k
        break;
802
803
2.98k
      case s_res_HTTP:
804
2.98k
        STRICT_CHECK(ch != '/');
805
2.98k
        UPDATE_STATE(s_res_http_major);
806
2.98k
        break;
807
808
2.98k
      case s_res_http_major:
809
2.98k
        if (UNLIKELY(!IS_NUM(ch))) {
810
16
          SET_ERRNO(HPE_INVALID_VERSION);
811
16
          goto error;
812
16
        }
813
814
2.96k
        parser->http_major = ch - '0';
815
2.96k
        UPDATE_STATE(s_res_http_dot);
816
2.96k
        break;
817
818
2.95k
      case s_res_http_dot:
819
2.95k
      {
820
2.95k
        if (UNLIKELY(ch != '.')) {
821
7
          SET_ERRNO(HPE_INVALID_VERSION);
822
7
          goto error;
823
7
        }
824
825
2.94k
        UPDATE_STATE(s_res_http_minor);
826
2.94k
        break;
827
2.95k
      }
828
829
2.94k
      case s_res_http_minor:
830
2.94k
        if (UNLIKELY(!IS_NUM(ch))) {
831
10
          SET_ERRNO(HPE_INVALID_VERSION);
832
10
          goto error;
833
10
        }
834
835
2.93k
        parser->http_minor = ch - '0';
836
2.93k
        UPDATE_STATE(s_res_http_end);
837
2.93k
        break;
838
839
2.92k
      case s_res_http_end:
840
2.92k
      {
841
2.92k
        if (UNLIKELY(ch != ' ')) {
842
13
          SET_ERRNO(HPE_INVALID_VERSION);
843
13
          goto error;
844
13
        }
845
846
2.90k
        UPDATE_STATE(s_res_first_status_code);
847
2.90k
        break;
848
2.92k
      }
849
850
3.12k
      case s_res_first_status_code:
851
3.12k
      {
852
3.12k
        if (!IS_NUM(ch)) {
853
257
          if (ch == ' ') {
854
234
            break;
855
234
          }
856
857
23
          SET_ERRNO(HPE_INVALID_STATUS);
858
23
          goto error;
859
257
        }
860
2.87k
        parser->status_code = ch - '0';
861
2.87k
        UPDATE_STATE(s_res_status_code);
862
2.87k
        break;
863
3.12k
      }
864
865
8.42k
      case s_res_status_code:
866
8.42k
      {
867
8.42k
        if (!IS_NUM(ch)) {
868
2.84k
          switch (ch) {
869
442
            case ' ':
870
442
              UPDATE_STATE(s_res_status_start);
871
442
              break;
872
266
            case CR:
873
2.39k
            case LF:
874
2.39k
              UPDATE_STATE(s_res_status_start);
875
2.39k
              REEXECUTE();
876
0
              break;
877
9
            default:
878
9
              SET_ERRNO(HPE_INVALID_STATUS);
879
9
              goto error;
880
2.84k
          }
881
442
          break;
882
2.84k
        }
883
884
5.58k
        parser->status_code *= 10;
885
5.58k
        parser->status_code += ch - '0';
886
887
5.58k
        if (UNLIKELY(parser->status_code > 999)) {
888
3
          SET_ERRNO(HPE_INVALID_STATUS);
889
3
          goto error;
890
3
        }
891
892
5.58k
        break;
893
5.58k
      }
894
895
5.58k
      case s_res_status_start:
896
2.82k
      {
897
2.82k
        MARK(status);
898
2.82k
        UPDATE_STATE(s_res_status);
899
2.82k
        parser->index = 0;
900
901
2.82k
        if (ch == CR || ch == LF)
902
2.58k
          REEXECUTE();
903
904
245
        break;
905
2.82k
      }
906
907
4.88k
      case s_res_status:
908
4.88k
        if (ch == CR) {
909
269
          UPDATE_STATE(s_res_line_almost_done);
910
269
          CALLBACK_DATA(status);
911
269
          break;
912
269
        }
913
914
4.61k
        if (ch == LF) {
915
2.51k
          UPDATE_STATE(s_header_field_start);
916
2.51k
          CALLBACK_DATA(status);
917
2.51k
          break;
918
2.51k
        }
919
920
2.09k
        break;
921
922
2.09k
      case s_res_line_almost_done:
923
262
        STRICT_CHECK(ch != LF);
924
262
        UPDATE_STATE(s_header_field_start);
925
262
        break;
926
927
14.6k
      case s_start_req:
928
14.6k
      {
929
14.6k
        if (ch == CR || ch == LF)
930
397
          break;
931
14.3k
        parser->flags = 0;
932
14.3k
        parser->uses_transfer_encoding = 0;
933
14.3k
        parser->content_length = ULLONG_MAX;
934
935
14.3k
        if (UNLIKELY(!IS_ALPHA(ch))) {
936
34
          SET_ERRNO(HPE_INVALID_METHOD);
937
34
          goto error;
938
34
        }
939
940
14.2k
        parser->method = (enum http_method) 0;
941
14.2k
        parser->index = 1;
942
14.2k
        switch (ch) {
943
1.69k
          case 'A': parser->method = HTTP_ACL; break;
944
206
          case 'B': parser->method = HTTP_BIND; break;
945
501
          case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
946
205
          case 'D': parser->method = HTTP_DELETE; break;
947
4.62k
          case 'G': parser->method = HTTP_GET; break;
948
194
          case 'H': parser->method = HTTP_HEAD; break;
949
288
          case 'L': parser->method = HTTP_LOCK; /* or LINK */ break;
950
951
          case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break;
951
204
          case 'N': parser->method = HTTP_NOTIFY; break;
952
12
          case 'O': parser->method = HTTP_OPTIONS; break;
953
3.18k
          case 'P': parser->method = HTTP_POST;
954
            /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
955
3.18k
            break;
956
274
          case 'R': parser->method = HTTP_REPORT; /* or REBIND */ break;
957
1.03k
          case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH, SOURCE */ break;
958
210
          case 'T': parser->method = HTTP_TRACE; break;
959
690
          case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE, UNBIND, UNLINK */ break;
960
7
          default:
961
7
            SET_ERRNO(HPE_INVALID_METHOD);
962
7
            goto error;
963
14.2k
        }
964
14.2k
        UPDATE_STATE(s_req_method);
965
966
14.2k
        CALLBACK_NOTIFY(message_begin);
967
968
14.2k
        break;
969
14.2k
      }
970
971
57.3k
      case s_req_method:
972
57.3k
      {
973
57.3k
        const char *matcher;
974
57.3k
        if (UNLIKELY(ch == '\0')) {
975
1
          SET_ERRNO(HPE_INVALID_METHOD);
976
1
          goto error;
977
1
        }
978
979
57.3k
        matcher = method_strings[parser->method];
980
57.3k
        if (ch == ' ' && matcher[parser->index] == '\0') {
981
13.6k
          UPDATE_STATE(s_req_spaces_before_url);
982
43.6k
        } else if (ch == matcher[parser->index]) {
983
36.2k
          ; /* nada */
984
36.2k
        } else if ((ch >= 'A' && ch <= 'Z') || ch == '-') {
985
986
7.36k
          switch (parser->method << 16 | parser->index << 8 | ch) {
987
0
#define XX(meth, pos, ch, new_meth) \
988
7.07k
            case (HTTP_##meth << 16 | pos << 8 | ch): \
989
7.07k
              parser->method = HTTP_##new_meth; break;
990
991
2.64k
            XX(POST,      1, 'U', PUT)
992
199
            XX(POST,      1, 'A', PATCH)
993
291
            XX(POST,      1, 'R', PROPFIND)
994
277
            XX(PUT,       2, 'R', PURGE)
995
196
            XX(CONNECT,   1, 'H', CHECKOUT)
996
254
            XX(CONNECT,   2, 'P', COPY)
997
260
            XX(MKCOL,     1, 'O', MOVE)
998
199
            XX(MKCOL,     1, 'E', MERGE)
999
253
            XX(MKCOL,     1, '-', MSEARCH)
1000
3
            XX(MKCOL,     2, 'A', MKACTIVITY)
1001
195
            XX(MKCOL,     3, 'A', MKCALENDAR)
1002
223
            XX(SUBSCRIBE, 1, 'E', SEARCH)
1003
753
            XX(SUBSCRIBE, 1, 'O', SOURCE)
1004
249
            XX(REPORT,    2, 'B', REBIND)
1005
195
            XX(PROPFIND,  4, 'P', PROPPATCH)
1006
238
            XX(LOCK,      1, 'I', LINK)
1007
196
            XX(UNLOCK,    2, 'S', UNSUBSCRIBE)
1008
242
            XX(UNLOCK,    2, 'B', UNBIND)
1009
208
            XX(UNLOCK,    3, 'I', UNLINK)
1010
0
#undef XX
1011
289
            default:
1012
289
              SET_ERRNO(HPE_INVALID_METHOD);
1013
289
              goto error;
1014
7.36k
          }
1015
7.36k
        } else {
1016
33
          SET_ERRNO(HPE_INVALID_METHOD);
1017
33
          goto error;
1018
33
        }
1019
1020
57.0k
        ++parser->index;
1021
57.0k
        break;
1022
57.3k
      }
1023
1024
13.8k
      case s_req_spaces_before_url:
1025
13.8k
      {
1026
13.8k
        if (ch == ' ') break;
1027
1028
13.6k
        MARK(url);
1029
13.6k
        if (parser->method == HTTP_CONNECT) {
1030
20
          UPDATE_STATE(s_req_server_start);
1031
20
        }
1032
1033
13.6k
        UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1034
13.6k
        if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1035
40
          SET_ERRNO(HPE_INVALID_URL);
1036
40
          goto error;
1037
40
        }
1038
1039
13.6k
        break;
1040
13.6k
      }
1041
1042
13.6k
      case s_req_schema:
1043
1.97k
      case s_req_schema_slash:
1044
2.82k
      case s_req_schema_slash_slash:
1045
3.65k
      case s_req_server_start:
1046
3.65k
      {
1047
3.65k
        switch (ch) {
1048
          /* No whitespace allowed here */
1049
1
          case ' ':
1050
2
          case CR:
1051
3
          case LF:
1052
3
            SET_ERRNO(HPE_INVALID_URL);
1053
3
            goto error;
1054
3.65k
          default:
1055
3.65k
            UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1056
3.65k
            if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1057
77
              SET_ERRNO(HPE_INVALID_URL);
1058
77
              goto error;
1059
77
            }
1060
3.65k
        }
1061
1062
3.57k
        break;
1063
3.65k
      }
1064
1065
5.08k
      case s_req_server:
1066
5.40k
      case s_req_server_with_at:
1067
19.1k
      case s_req_path:
1068
19.9k
      case s_req_query_string_start:
1069
21.3k
      case s_req_query_string:
1070
22.2k
      case s_req_fragment_start:
1071
23.2k
      case s_req_fragment:
1072
23.2k
      {
1073
23.2k
        switch (ch) {
1074
1.50k
          case ' ':
1075
1.50k
            UPDATE_STATE(s_req_http_start);
1076
1.50k
            CALLBACK_DATA(url);
1077
1.50k
            break;
1078
1.50k
          case CR:
1079
11.5k
          case LF:
1080
11.5k
            parser->http_major = 0;
1081
11.5k
            parser->http_minor = 9;
1082
11.5k
            UPDATE_STATE((ch == CR) ?
1083
11.5k
              s_req_line_almost_done :
1084
11.5k
              s_header_field_start);
1085
11.5k
            CALLBACK_DATA(url);
1086
11.5k
            break;
1087
11.5k
          default:
1088
10.1k
            UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1089
10.1k
            if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1090
56
              SET_ERRNO(HPE_INVALID_URL);
1091
56
              goto error;
1092
56
            }
1093
23.2k
        }
1094
23.1k
        break;
1095
23.2k
      }
1096
1097
23.1k
      case s_req_http_start:
1098
1.68k
        switch (ch) {
1099
194
          case ' ':
1100
194
            break;
1101
933
          case 'H':
1102
933
            UPDATE_STATE(s_req_http_H);
1103
933
            break;
1104
559
          case 'I':
1105
559
            if (parser->method == HTTP_SOURCE) {
1106
550
              UPDATE_STATE(s_req_http_I);
1107
550
              break;
1108
550
            }
1109
            /* fall through */
1110
11
          default:
1111
11
            SET_ERRNO(HPE_INVALID_CONSTANT);
1112
11
            goto error;
1113
1.68k
        }
1114
1.67k
        break;
1115
1116
1.67k
      case s_req_http_H:
1117
929
        STRICT_CHECK(ch != 'T');
1118
929
        UPDATE_STATE(s_req_http_HT);
1119
929
        break;
1120
1121
924
      case s_req_http_HT:
1122
924
        STRICT_CHECK(ch != 'T');
1123
924
        UPDATE_STATE(s_req_http_HTT);
1124
924
        break;
1125
1126
918
      case s_req_http_HTT:
1127
918
        STRICT_CHECK(ch != 'P');
1128
918
        UPDATE_STATE(s_req_http_HTTP);
1129
918
        break;
1130
1131
544
      case s_req_http_I:
1132
544
        STRICT_CHECK(ch != 'C');
1133
544
        UPDATE_STATE(s_req_http_IC);
1134
544
        break;
1135
1136
540
      case s_req_http_IC:
1137
540
        STRICT_CHECK(ch != 'E');
1138
540
        UPDATE_STATE(s_req_http_HTTP);  /* Treat "ICE" as "HTTP". */
1139
540
        break;
1140
1141
1.44k
      case s_req_http_HTTP:
1142
1.44k
        STRICT_CHECK(ch != '/');
1143
1.44k
        UPDATE_STATE(s_req_http_major);
1144
1.44k
        break;
1145
1146
1.44k
      case s_req_http_major:
1147
1.44k
        if (UNLIKELY(!IS_NUM(ch))) {
1148
14
          SET_ERRNO(HPE_INVALID_VERSION);
1149
14
          goto error;
1150
14
        }
1151
1152
1.42k
        parser->http_major = ch - '0';
1153
1.42k
        UPDATE_STATE(s_req_http_dot);
1154
1.42k
        break;
1155
1156
1.41k
      case s_req_http_dot:
1157
1.41k
      {
1158
1.41k
        if (UNLIKELY(ch != '.')) {
1159
11
          SET_ERRNO(HPE_INVALID_VERSION);
1160
11
          goto error;
1161
11
        }
1162
1163
1.40k
        UPDATE_STATE(s_req_http_minor);
1164
1.40k
        break;
1165
1.41k
      }
1166
1167
1.40k
      case s_req_http_minor:
1168
1.40k
        if (UNLIKELY(!IS_NUM(ch))) {
1169
13
          SET_ERRNO(HPE_INVALID_VERSION);
1170
13
          goto error;
1171
13
        }
1172
1173
1.38k
        parser->http_minor = ch - '0';
1174
1.38k
        UPDATE_STATE(s_req_http_end);
1175
1.38k
        break;
1176
1177
1.37k
      case s_req_http_end:
1178
1.37k
      {
1179
1.37k
        if (ch == CR) {
1180
194
          UPDATE_STATE(s_req_line_almost_done);
1181
194
          break;
1182
194
        }
1183
1184
1.18k
        if (ch == LF) {
1185
1.17k
          UPDATE_STATE(s_header_field_start);
1186
1.17k
          break;
1187
1.17k
        }
1188
1189
8
        SET_ERRNO(HPE_INVALID_VERSION);
1190
8
        goto error;
1191
0
        break;
1192
1.18k
      }
1193
1194
      /* end of request line */
1195
451
      case s_req_line_almost_done:
1196
451
      {
1197
451
        if (UNLIKELY(ch != LF)) {
1198
11
          SET_ERRNO(HPE_LF_EXPECTED);
1199
11
          goto error;
1200
11
        }
1201
1202
440
        UPDATE_STATE(s_header_field_start);
1203
440
        break;
1204
451
      }
1205
1206
101k
      case s_header_field_start:
1207
101k
      {
1208
101k
        if (ch == CR) {
1209
195
          UPDATE_STATE(s_headers_almost_done);
1210
195
          break;
1211
195
        }
1212
1213
101k
        if (ch == LF) {
1214
          /* they might be just sending \n instead of \r\n so this would be
1215
           * the second \n to denote the end of headers*/
1216
14.2k
          UPDATE_STATE(s_headers_almost_done);
1217
14.2k
          REEXECUTE();
1218
0
        }
1219
1220
87.4k
        c = TOKEN(ch);
1221
1222
87.4k
        if (UNLIKELY(!c)) {
1223
42
          SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1224
42
          goto error;
1225
42
        }
1226
1227
87.3k
        MARK(header_field);
1228
1229
87.3k
        parser->index = 0;
1230
87.3k
        UPDATE_STATE(s_header_field);
1231
1232
87.3k
        switch (c) {
1233
26.6k
          case 'c':
1234
26.6k
            parser->header_state = h_C;
1235
26.6k
            break;
1236
1237
4.27k
          case 'p':
1238
4.27k
            parser->header_state = h_matching_proxy_connection;
1239
4.27k
            break;
1240
1241
8.03k
          case 't':
1242
8.03k
            parser->header_state = h_matching_transfer_encoding;
1243
8.03k
            break;
1244
1245
9.69k
          case 'u':
1246
9.69k
            parser->header_state = h_matching_upgrade;
1247
9.69k
            break;
1248
1249
38.7k
          default:
1250
38.7k
            parser->header_state = h_general;
1251
38.7k
            break;
1252
87.3k
        }
1253
87.3k
        break;
1254
87.3k
      }
1255
1256
87.3k
      case s_header_field:
1257
87.3k
      {
1258
87.3k
        const char* start = p;
1259
451k
        for (; p != data + len; p++) {
1260
451k
          ch = *p;
1261
451k
          c = TOKEN(ch);
1262
1263
451k
          if (!c)
1264
86.9k
            break;
1265
1266
364k
          switch (parser->header_state) {
1267
50.4k
            case h_general: {
1268
50.4k
              size_t left = data + len - p;
1269
50.4k
              const char* pe = p + MIN(left, max_header_size);
1270
9.64M
              while (p+1 < pe && TOKEN(p[1])) {
1271
9.59M
                p++;
1272
9.59M
              }
1273
50.4k
              break;
1274
0
            }
1275
1276
26.1k
            case h_C:
1277
26.1k
              parser->index++;
1278
26.1k
              parser->header_state = (c == 'o' ? h_CO : h_general);
1279
26.1k
              break;
1280
1281
24.2k
            case h_CO:
1282
24.2k
              parser->index++;
1283
24.2k
              parser->header_state = (c == 'n' ? h_CON : h_general);
1284
24.2k
              break;
1285
1286
23.0k
            case h_CON:
1287
23.0k
              parser->index++;
1288
23.0k
              switch (c) {
1289
16.0k
                case 'n':
1290
16.0k
                  parser->header_state = h_matching_connection;
1291
16.0k
                  break;
1292
6.30k
                case 't':
1293
6.30k
                  parser->header_state = h_matching_content_length;
1294
6.30k
                  break;
1295
688
                default:
1296
688
                  parser->header_state = h_general;
1297
688
                  break;
1298
23.0k
              }
1299
23.0k
              break;
1300
1301
            /* connection */
1302
1303
76.6k
            case h_matching_connection:
1304
76.6k
              parser->index++;
1305
76.6k
              if (parser->index > sizeof(CONNECTION)-1
1306
76.6k
                  || c != CONNECTION[parser->index]) {
1307
3.76k
                parser->header_state = h_general;
1308
72.8k
              } else if (parser->index == sizeof(CONNECTION)-2) {
1309
11.8k
                parser->header_state = h_connection;
1310
11.8k
              }
1311
76.6k
              break;
1312
1313
            /* proxy-connection */
1314
1315
11.5k
            case h_matching_proxy_connection:
1316
11.5k
              parser->index++;
1317
11.5k
              if (parser->index > sizeof(PROXY_CONNECTION)-1
1318
11.5k
                  || c != PROXY_CONNECTION[parser->index]) {
1319
3.66k
                parser->header_state = h_general;
1320
7.93k
              } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
1321
313
                parser->header_state = h_connection;
1322
313
              }
1323
11.5k
              break;
1324
1325
            /* content-length */
1326
1327
25.5k
            case h_matching_content_length:
1328
25.5k
              parser->index++;
1329
25.5k
              if (parser->index > sizeof(CONTENT_LENGTH)-1
1330
25.5k
                  || c != CONTENT_LENGTH[parser->index]) {
1331
2.04k
                parser->header_state = h_general;
1332
23.5k
              } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1333
1.94k
                parser->header_state = h_content_length;
1334
1.94k
              }
1335
25.5k
              break;
1336
1337
            /* transfer-encoding */
1338
1339
101k
            case h_matching_transfer_encoding:
1340
101k
              parser->index++;
1341
101k
              if (parser->index > sizeof(TRANSFER_ENCODING)-1
1342
101k
                  || c != TRANSFER_ENCODING[parser->index]) {
1343
1.93k
                parser->header_state = h_general;
1344
99.3k
              } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1345
5.62k
                parser->header_state = h_transfer_encoding;
1346
5.62k
                parser->uses_transfer_encoding = 1;
1347
5.62k
              }
1348
101k
              break;
1349
1350
            /* upgrade */
1351
1352
15.2k
            case h_matching_upgrade:
1353
15.2k
              parser->index++;
1354
15.2k
              if (parser->index > sizeof(UPGRADE)-1
1355
15.2k
                  || c != UPGRADE[parser->index]) {
1356
5.68k
                parser->header_state = h_general;
1357
9.56k
              } else if (parser->index == sizeof(UPGRADE)-2) {
1358
1.40k
                parser->header_state = h_upgrade;
1359
1.40k
              }
1360
15.2k
              break;
1361
1362
8.83k
            case h_connection:
1363
9.51k
            case h_content_length:
1364
9.75k
            case h_transfer_encoding:
1365
10.3k
            case h_upgrade:
1366
10.3k
              if (ch != ' ') parser->header_state = h_general;
1367
10.3k
              break;
1368
1369
0
            default:
1370
0
              assert(0 && "Unknown header_state");
1371
0
              break;
1372
364k
          }
1373
364k
        }
1374
1375
87.3k
        if (p == data + len) {
1376
360
          --p;
1377
360
          COUNT_HEADER_SIZE(p - start);
1378
329
          break;
1379
360
        }
1380
1381
86.9k
        COUNT_HEADER_SIZE(p - start);
1382
1383
86.9k
        if (ch == ':') {
1384
86.8k
          UPDATE_STATE(s_header_value_discard_ws);
1385
86.8k
          CALLBACK_DATA(header_field);
1386
86.8k
          break;
1387
86.8k
        }
1388
1389
40
        SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1390
40
        goto error;
1391
86.9k
      }
1392
1393
135k
      case s_header_value_discard_ws:
1394
135k
        if (ch == ' ' || ch == '\t') break;
1395
1396
91.4k
        if (ch == CR) {
1397
1.23k
          UPDATE_STATE(s_header_value_discard_ws_almost_done);
1398
1.23k
          break;
1399
1.23k
        }
1400
1401
90.1k
        if (ch == LF) {
1402
44.1k
          UPDATE_STATE(s_header_value_discard_lws);
1403
44.1k
          break;
1404
44.1k
        }
1405
1406
        /* fall through */
1407
1408
85.0k
      case s_header_value_start:
1409
85.0k
      {
1410
85.0k
        MARK(header_value);
1411
1412
85.0k
        UPDATE_STATE(s_header_value);
1413
85.0k
        parser->index = 0;
1414
1415
85.0k
        c = LOWER(ch);
1416
1417
85.0k
        switch (parser->header_state) {
1418
798
          case h_upgrade:
1419
798
            parser->flags |= F_UPGRADE;
1420
798
            parser->header_state = h_general;
1421
798
            break;
1422
1423
5.51k
          case h_transfer_encoding:
1424
            /* looking for 'Transfer-Encoding: chunked' */
1425
5.51k
            if ('c' == c) {
1426
4.18k
              parser->header_state = h_matching_transfer_encoding_chunked;
1427
4.18k
            } else {
1428
1.32k
              parser->header_state = h_matching_transfer_encoding_token;
1429
1.32k
            }
1430
5.51k
            break;
1431
1432
          /* Multi-value `Transfer-Encoding` header */
1433
262
          case h_matching_transfer_encoding_token_start:
1434
262
            break;
1435
1436
1.43k
          case h_content_length:
1437
1.43k
            if (UNLIKELY(!IS_NUM(ch))) {
1438
14
              SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1439
14
              goto error;
1440
14
            }
1441
1442
1.41k
            if (parser->flags & F_CONTENTLENGTH) {
1443
1
              SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1444
1
              goto error;
1445
1
            }
1446
1447
1.41k
            parser->flags |= F_CONTENTLENGTH;
1448
1.41k
            parser->content_length = ch - '0';
1449
1.41k
            parser->header_state = h_content_length_num;
1450
1.41k
            break;
1451
1452
          /* when obsolete line folding is encountered for content length
1453
           * continue to the s_header_value state */
1454
387
          case h_content_length_ws:
1455
387
            break;
1456
1457
11.2k
          case h_connection:
1458
            /* looking for 'Connection: keep-alive' */
1459
11.2k
            if (c == 'k') {
1460
1.26k
              parser->header_state = h_matching_connection_keep_alive;
1461
            /* looking for 'Connection: close' */
1462
9.94k
            } else if (c == 'c') {
1463
2.62k
              parser->header_state = h_matching_connection_close;
1464
7.32k
            } else if (c == 'u') {
1465
1.96k
              parser->header_state = h_matching_connection_upgrade;
1466
5.35k
            } else {
1467
5.35k
              parser->header_state = h_matching_connection_token;
1468
5.35k
            }
1469
11.2k
            break;
1470
1471
          /* Multi-value `Connection` header */
1472
549
          case h_matching_connection_token_start:
1473
549
            break;
1474
1475
64.9k
          default:
1476
64.9k
            parser->header_state = h_general;
1477
64.9k
            break;
1478
85.0k
        }
1479
85.0k
        break;
1480
85.0k
      }
1481
1482
85.0k
      case s_header_value:
1483
84.8k
      {
1484
84.8k
        const char* start = p;
1485
84.8k
        enum header_states h_state = (enum header_states) parser->header_state;
1486
1.80M
        for (; p != data + len; p++) {
1487
1.80M
          ch = *p;
1488
1.80M
          if (ch == CR) {
1489
2.73k
            UPDATE_STATE(s_header_almost_done);
1490
2.73k
            parser->header_state = h_state;
1491
2.73k
            CALLBACK_DATA(header_value);
1492
2.73k
            break;
1493
2.73k
          }
1494
1495
1.80M
          if (ch == LF) {
1496
81.4k
            UPDATE_STATE(s_header_almost_done);
1497
81.4k
            COUNT_HEADER_SIZE(p - start);
1498
81.4k
            parser->header_state = h_state;
1499
81.4k
            CALLBACK_DATA_NOADVANCE(header_value);
1500
81.4k
            REEXECUTE();
1501
0
          }
1502
1503
1.72M
          if (!lenient && !IS_HEADER_CHAR(ch)) {
1504
16
            SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1505
16
            goto error;
1506
16
          }
1507
1508
1.72M
          c = LOWER(ch);
1509
1510
1.72M
          switch (h_state) {
1511
60.6k
            case h_general:
1512
60.6k
              {
1513
60.6k
                size_t left = data + len - p;
1514
60.6k
                const char* pe = p + MIN(left, max_header_size);
1515
1516
13.8M
                for (; p != pe; p++) {
1517
13.8M
                  ch = *p;
1518
13.8M
                  if (ch == CR || ch == LF) {
1519
60.4k
                    --p;
1520
60.4k
                    break;
1521
60.4k
                  }
1522
13.8M
                  if (!lenient && !IS_HEADER_CHAR(ch)) {
1523
27
                    SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1524
27
                    goto error;
1525
27
                  }
1526
13.8M
                }
1527
60.6k
                if (p == data + len)
1528
92
                  --p;
1529
60.6k
                break;
1530
60.6k
              }
1531
1532
0
            case h_connection:
1533
0
            case h_transfer_encoding:
1534
0
              assert(0 && "Shouldn't get here.");
1535
0
              break;
1536
1537
0
            case h_content_length:
1538
0
              if (ch == ' ') break;
1539
0
              h_state = h_content_length_num;
1540
              /* fall through */
1541
1542
6.23k
            case h_content_length_num:
1543
6.23k
            {
1544
6.23k
              uint64_t t;
1545
1546
6.23k
              if (ch == ' ') {
1547
213
                h_state = h_content_length_ws;
1548
213
                break;
1549
213
              }
1550
1551
6.01k
              if (UNLIKELY(!IS_NUM(ch))) {
1552
19
                SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1553
19
                parser->header_state = h_state;
1554
19
                goto error;
1555
19
              }
1556
1557
6.00k
              t = parser->content_length;
1558
6.00k
              t *= 10;
1559
6.00k
              t += ch - '0';
1560
1561
              /* Overflow? Test against a conservative limit for simplicity. */
1562
6.00k
              if (UNLIKELY((ULLONG_MAX - 10) / 10 < parser->content_length)) {
1563
6
                SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1564
6
                parser->header_state = h_state;
1565
6
                goto error;
1566
6
              }
1567
1568
5.99k
              parser->content_length = t;
1569
5.99k
              break;
1570
6.00k
            }
1571
1572
234
            case h_content_length_ws:
1573
234
              if (ch == ' ') break;
1574
8
              SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1575
8
              parser->header_state = h_state;
1576
8
              goto error;
1577
1578
            /* Transfer-Encoding: chunked */
1579
7.41k
            case h_matching_transfer_encoding_token_start:
1580
              /* looking for 'Transfer-Encoding: chunked' */
1581
7.41k
              if ('c' == c) {
1582
2.70k
                h_state = h_matching_transfer_encoding_chunked;
1583
4.71k
              } else if (STRICT_TOKEN(c)) {
1584
                /* TODO(indutny): similar code below does this, but why?
1585
                 * At the very least it seems to be inconsistent given that
1586
                 * h_matching_transfer_encoding_token does not check for
1587
                 * `STRICT_TOKEN`
1588
                 */
1589
2.39k
                h_state = h_matching_transfer_encoding_token;
1590
2.39k
              } else if (c == ' ' || c == '\t') {
1591
                /* Skip lws */
1592
1.55k
              } else {
1593
763
                h_state = h_general;
1594
763
              }
1595
7.41k
              break;
1596
1597
19.5k
            case h_matching_transfer_encoding_chunked:
1598
19.5k
              parser->index++;
1599
19.5k
              if (parser->index > sizeof(CHUNKED)-1
1600
19.5k
                  || c != CHUNKED[parser->index]) {
1601
4.44k
                h_state = h_matching_transfer_encoding_token;
1602
15.0k
              } else if (parser->index == sizeof(CHUNKED)-2) {
1603
2.23k
                h_state = h_transfer_encoding_chunked;
1604
2.23k
              }
1605
19.5k
              break;
1606
1607
399k
            case h_matching_transfer_encoding_token:
1608
399k
              if (ch == ',') {
1609
5.88k
                h_state = h_matching_transfer_encoding_token_start;
1610
5.88k
                parser->index = 0;
1611
5.88k
              }
1612
399k
              break;
1613
1614
71.6k
            case h_matching_connection_token_start:
1615
              /* looking for 'Connection: keep-alive' */
1616
71.6k
              if (c == 'k') {
1617
6.94k
                h_state = h_matching_connection_keep_alive;
1618
              /* looking for 'Connection: close' */
1619
64.6k
              } else if (c == 'c') {
1620
5.27k
                h_state = h_matching_connection_close;
1621
59.3k
              } else if (c == 'u') {
1622
1.21k
                h_state = h_matching_connection_upgrade;
1623
58.1k
              } else if (STRICT_TOKEN(c)) {
1624
9.30k
                h_state = h_matching_connection_token;
1625
48.8k
              } else if (c == ' ' || c == '\t') {
1626
                /* Skip lws */
1627
47.7k
              } else {
1628
1.11k
                h_state = h_general;
1629
1.11k
              }
1630
71.6k
              break;
1631
1632
            /* looking for 'Connection: keep-alive' */
1633
36.8k
            case h_matching_connection_keep_alive:
1634
36.8k
              parser->index++;
1635
36.8k
              if (parser->index > sizeof(KEEP_ALIVE)-1
1636
36.8k
                  || c != KEEP_ALIVE[parser->index]) {
1637
4.41k
                h_state = h_matching_connection_token;
1638
32.4k
              } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1639
3.22k
                h_state = h_connection_keep_alive;
1640
3.22k
              }
1641
36.8k
              break;
1642
1643
            /* looking for 'Connection: close' */
1644
20.6k
            case h_matching_connection_close:
1645
20.6k
              parser->index++;
1646
20.6k
              if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1647
3.99k
                h_state = h_matching_connection_token;
1648
16.6k
              } else if (parser->index == sizeof(CLOSE)-2) {
1649
3.65k
                h_state = h_connection_close;
1650
3.65k
              }
1651
20.6k
              break;
1652
1653
            /* looking for 'Connection: upgrade' */
1654
13.2k
            case h_matching_connection_upgrade:
1655
13.2k
              parser->index++;
1656
13.2k
              if (parser->index > sizeof(UPGRADE) - 1 ||
1657
13.2k
                  c != UPGRADE[parser->index]) {
1658
1.13k
                h_state = h_matching_connection_token;
1659
12.0k
              } else if (parser->index == sizeof(UPGRADE)-2) {
1660
1.83k
                h_state = h_connection_upgrade;
1661
1.83k
              }
1662
13.2k
              break;
1663
1664
1.06M
            case h_matching_connection_token:
1665
1.06M
              if (ch == ',') {
1666
19.8k
                h_state = h_matching_connection_token_start;
1667
19.8k
                parser->index = 0;
1668
19.8k
              }
1669
1.06M
              break;
1670
1671
7.04k
            case h_transfer_encoding_chunked:
1672
7.04k
              if (ch != ' ') h_state = h_matching_transfer_encoding_token;
1673
7.04k
              break;
1674
1675
3.29k
            case h_connection_keep_alive:
1676
13.7k
            case h_connection_close:
1677
14.6k
            case h_connection_upgrade:
1678
14.6k
              if (ch == ',') {
1679
4.30k
                if (h_state == h_connection_keep_alive) {
1680
2.31k
                  parser->flags |= F_CONNECTION_KEEP_ALIVE;
1681
2.31k
                } else if (h_state == h_connection_close) {
1682
1.55k
                  parser->flags |= F_CONNECTION_CLOSE;
1683
1.55k
                } else if (h_state == h_connection_upgrade) {
1684
446
                  parser->flags |= F_CONNECTION_UPGRADE;
1685
446
                }
1686
4.30k
                h_state = h_matching_connection_token_start;
1687
4.30k
                parser->index = 0;
1688
10.3k
              } else if (ch != ' ') {
1689
1.82k
                h_state = h_matching_connection_token;
1690
1.82k
              }
1691
14.6k
              break;
1692
1693
0
            default:
1694
0
              UPDATE_STATE(s_header_value);
1695
0
              h_state = h_general;
1696
0
              break;
1697
1.72M
          }
1698
1.72M
        }
1699
3.34k
        parser->header_state = h_state;
1700
1701
3.34k
        if (p == data + len)
1702
613
          --p;
1703
1704
3.34k
        COUNT_HEADER_SIZE(p - start);
1705
3.30k
        break;
1706
3.34k
      }
1707
1708
84.1k
      case s_header_almost_done:
1709
84.1k
      {
1710
84.1k
        if (UNLIKELY(ch != LF)) {
1711
8
          SET_ERRNO(HPE_LF_EXPECTED);
1712
8
          goto error;
1713
8
        }
1714
1715
84.1k
        UPDATE_STATE(s_header_value_lws);
1716
84.1k
        break;
1717
84.1k
      }
1718
1719
84.1k
      case s_header_value_lws:
1720
84.1k
      {
1721
84.1k
        if (ch == ' ' || ch == '\t') {
1722
38.9k
          if (parser->header_state == h_content_length_num) {
1723
              /* treat obsolete line folding as space */
1724
201
              parser->header_state = h_content_length_ws;
1725
201
          }
1726
38.9k
          UPDATE_STATE(s_header_value_start);
1727
38.9k
          REEXECUTE();
1728
0
        }
1729
1730
        /* finished the header */
1731
45.1k
        switch (parser->header_state) {
1732
265
          case h_connection_keep_alive:
1733
265
            parser->flags |= F_CONNECTION_KEEP_ALIVE;
1734
265
            break;
1735
1.12k
          case h_connection_close:
1736
1.12k
            parser->flags |= F_CONNECTION_CLOSE;
1737
1.12k
            break;
1738
1.06k
          case h_transfer_encoding_chunked:
1739
1.06k
            parser->flags |= F_CHUNKED;
1740
1.06k
            break;
1741
1.17k
          case h_connection_upgrade:
1742
1.17k
            parser->flags |= F_CONNECTION_UPGRADE;
1743
1.17k
            break;
1744
41.5k
          default:
1745
41.5k
            break;
1746
45.1k
        }
1747
1748
45.1k
        UPDATE_STATE(s_header_field_start);
1749
45.1k
        REEXECUTE();
1750
0
      }
1751
1752
1.23k
      case s_header_value_discard_ws_almost_done:
1753
1.23k
      {
1754
1.23k
        STRICT_CHECK(ch != LF);
1755
1.23k
        UPDATE_STATE(s_header_value_discard_lws);
1756
1.23k
        break;
1757
0
      }
1758
1759
45.3k
      case s_header_value_discard_lws:
1760
45.3k
      {
1761
45.3k
        if (ch == ' ' || ch == '\t') {
1762
4.60k
          UPDATE_STATE(s_header_value_discard_ws);
1763
4.60k
          break;
1764
40.7k
        } else {
1765
40.7k
          switch (parser->header_state) {
1766
0
            case h_connection_keep_alive:
1767
0
              parser->flags |= F_CONNECTION_KEEP_ALIVE;
1768
0
              break;
1769
0
            case h_connection_close:
1770
0
              parser->flags |= F_CONNECTION_CLOSE;
1771
0
              break;
1772
0
            case h_connection_upgrade:
1773
0
              parser->flags |= F_CONNECTION_UPGRADE;
1774
0
              break;
1775
0
            case h_transfer_encoding_chunked:
1776
0
              parser->flags |= F_CHUNKED;
1777
0
              break;
1778
1
            case h_content_length:
1779
              /* do not allow empty content length */
1780
1
              SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1781
1
              goto error;
1782
0
              break;
1783
40.7k
            default:
1784
40.7k
              break;
1785
40.7k
          }
1786
1787
          /* header value was empty */
1788
40.7k
          MARK(header_value);
1789
40.7k
          UPDATE_STATE(s_header_field_start);
1790
40.7k
          CALLBACK_DATA_NOADVANCE(header_value);
1791
40.7k
          REEXECUTE();
1792
0
        }
1793
45.3k
      }
1794
1795
14.4k
      case s_headers_almost_done:
1796
14.4k
      {
1797
14.4k
        STRICT_CHECK(ch != LF);
1798
1799
14.4k
        if (parser->flags & F_TRAILING) {
1800
          /* End of a chunked request */
1801
343
          UPDATE_STATE(s_message_done);
1802
343
          CALLBACK_NOTIFY_NOADVANCE(chunk_complete);
1803
343
          REEXECUTE();
1804
0
        }
1805
1806
        /* Cannot use transfer-encoding and a content-length header together
1807
           per the HTTP specification. (RFC 7230 Section 3.3.3) */
1808
14.0k
        if ((parser->uses_transfer_encoding == 1) &&
1809
14.0k
            (parser->flags & F_CONTENTLENGTH)) {
1810
          /* Allow it for lenient parsing as long as `Transfer-Encoding` is
1811
           * not `chunked` or allow_length_with_encoding is set
1812
           */
1813
2
          if (parser->flags & F_CHUNKED) {
1814
1
            if (!allow_chunked_length) {
1815
1
              SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1816
1
              goto error;
1817
1
            }
1818
1
          } else if (!lenient) {
1819
1
            SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1820
1
            goto error;
1821
1
          }
1822
2
        }
1823
1824
14.0k
        UPDATE_STATE(s_headers_done);
1825
1826
        /* Set this here so that on_headers_complete() callbacks can see it */
1827
14.0k
        if ((parser->flags & F_UPGRADE) &&
1828
14.0k
            (parser->flags & F_CONNECTION_UPGRADE)) {
1829
          /* For responses, "Upgrade: foo" and "Connection: upgrade" are
1830
           * mandatory only when it is a 101 Switching Protocols response,
1831
           * otherwise it is purely informational, to announce support.
1832
           */
1833
219
          parser->upgrade =
1834
219
              (parser->type == HTTP_REQUEST || parser->status_code == 101);
1835
13.8k
        } else {
1836
13.8k
          parser->upgrade = (parser->method == HTTP_CONNECT);
1837
13.8k
        }
1838
1839
        /* Here we call the headers_complete callback. This is somewhat
1840
         * different than other callbacks because if the user returns 1, we
1841
         * will interpret that as saying that this message has no body. This
1842
         * is needed for the annoying case of recieving a response to a HEAD
1843
         * request.
1844
         *
1845
         * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1846
         * we have to simulate it by handling a change in errno below.
1847
         */
1848
14.0k
        if (settings->on_headers_complete) {
1849
0
          switch (settings->on_headers_complete(parser)) {
1850
0
            case 0:
1851
0
              break;
1852
1853
0
            case 2:
1854
0
              parser->upgrade = 1;
1855
1856
              /* fall through */
1857
0
            case 1:
1858
0
              parser->flags |= F_SKIPBODY;
1859
0
              break;
1860
1861
0
            default:
1862
0
              SET_ERRNO(HPE_CB_headers_complete);
1863
0
              RETURN(p - data); /* Error */
1864
0
          }
1865
0
        }
1866
1867
14.0k
        if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1868
0
          RETURN(p - data);
1869
0
        }
1870
1871
14.0k
        REEXECUTE();
1872
0
      }
1873
1874
14.0k
      case s_headers_done:
1875
14.0k
      {
1876
14.0k
        int hasBody;
1877
14.0k
        STRICT_CHECK(ch != LF);
1878
1879
14.0k
        parser->nread = 0;
1880
14.0k
        nread = 0;
1881
1882
14.0k
        hasBody = parser->flags & F_CHUNKED ||
1883
14.0k
          (parser->content_length > 0 && parser->content_length != ULLONG_MAX);
1884
14.0k
        if (parser->upgrade && (parser->method == HTTP_CONNECT ||
1885
12
                                (parser->flags & F_SKIPBODY) || !hasBody)) {
1886
          /* Exit, the rest of the message is in a different protocol. */
1887
10
          UPDATE_STATE(NEW_MESSAGE());
1888
10
          CALLBACK_NOTIFY(message_complete);
1889
10
          RETURN((p - data) + 1);
1890
0
        }
1891
1892
14.0k
        if (parser->flags & F_SKIPBODY) {
1893
0
          UPDATE_STATE(NEW_MESSAGE());
1894
0
          CALLBACK_NOTIFY(message_complete);
1895
14.0k
        } else if (parser->flags & F_CHUNKED) {
1896
          /* chunked encoding - ignore Content-Length header,
1897
           * prepare for a chunk */
1898
877
          UPDATE_STATE(s_chunk_size_start);
1899
13.1k
        } else if (parser->uses_transfer_encoding == 1) {
1900
8
          if (parser->type == HTTP_REQUEST && !lenient) {
1901
            /* RFC 7230 3.3.3 */
1902
1903
            /* If a Transfer-Encoding header field
1904
             * is present in a request and the chunked transfer coding is not
1905
             * the final encoding, the message body length cannot be determined
1906
             * reliably; the server MUST respond with the 400 (Bad Request)
1907
             * status code and then close the connection.
1908
             */
1909
7
            SET_ERRNO(HPE_INVALID_TRANSFER_ENCODING);
1910
7
            RETURN(p - data); /* Error */
1911
1
          } else {
1912
            /* RFC 7230 3.3.3 */
1913
1914
            /* If a Transfer-Encoding header field is present in a response and
1915
             * the chunked transfer coding is not the final encoding, the
1916
             * message body length is determined by reading the connection until
1917
             * it is closed by the server.
1918
             */
1919
1
            UPDATE_STATE(s_body_identity_eof);
1920
1
          }
1921
13.1k
        } else {
1922
13.1k
          if (parser->content_length == 0) {
1923
            /* Content-Length header given but zero: Content-Length: 0\r\n */
1924
604
            UPDATE_STATE(NEW_MESSAGE());
1925
604
            CALLBACK_NOTIFY(message_complete);
1926
12.5k
          } else if (parser->content_length != ULLONG_MAX) {
1927
            /* Content-Length header given and non-zero */
1928
614
            UPDATE_STATE(s_body_identity);
1929
11.9k
          } else {
1930
11.9k
            if (!http_message_needs_eof(parser)) {
1931
              /* Assume content-length 0 - read the next */
1932
11.9k
              UPDATE_STATE(NEW_MESSAGE());
1933
11.9k
              CALLBACK_NOTIFY(message_complete);
1934
11.9k
            } else {
1935
              /* Read body until EOF */
1936
46
              UPDATE_STATE(s_body_identity_eof);
1937
46
            }
1938
11.9k
          }
1939
13.1k
        }
1940
1941
14.0k
        break;
1942
14.0k
      }
1943
1944
14.0k
      case s_body_identity:
1945
498
      {
1946
498
        uint64_t to_read = MIN(parser->content_length,
1947
498
                               (uint64_t) ((data + len) - p));
1948
1949
498
        assert(parser->content_length != 0
1950
498
            && parser->content_length != ULLONG_MAX);
1951
1952
        /* The difference between advancing content_length and p is because
1953
         * the latter will automaticaly advance on the next loop iteration.
1954
         * Further, if content_length ends up at 0, we want to see the last
1955
         * byte again for our message complete callback.
1956
         */
1957
498
        MARK(body);
1958
498
        parser->content_length -= to_read;
1959
498
        p += to_read - 1;
1960
1961
498
        if (parser->content_length == 0) {
1962
309
          UPDATE_STATE(s_message_done);
1963
1964
          /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1965
           *
1966
           * The alternative to doing this is to wait for the next byte to
1967
           * trigger the data callback, just as in every other case. The
1968
           * problem with this is that this makes it difficult for the test
1969
           * harness to distinguish between complete-on-EOF and
1970
           * complete-on-length. It's not clear that this distinction is
1971
           * important for applications, but let's keep it for now.
1972
           */
1973
309
          CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1974
309
          REEXECUTE();
1975
0
        }
1976
1977
189
        break;
1978
498
      }
1979
1980
      /* read until EOF */
1981
189
      case s_body_identity_eof:
1982
1
        MARK(body);
1983
1
        p = data + len - 1;
1984
1985
1
        break;
1986
1987
652
      case s_message_done:
1988
652
        UPDATE_STATE(NEW_MESSAGE());
1989
652
        CALLBACK_NOTIFY(message_complete);
1990
652
        if (parser->upgrade) {
1991
          /* Exit, the rest of the message is in a different protocol. */
1992
1
          RETURN((p - data) + 1);
1993
0
        }
1994
651
        break;
1995
1996
2.28k
      case s_chunk_size_start:
1997
2.28k
      {
1998
2.28k
        assert(nread == 1);
1999
2.28k
        assert(parser->flags & F_CHUNKED);
2000
2001
2.28k
        unhex_val = unhex[(unsigned char)ch];
2002
2.28k
        if (UNLIKELY(unhex_val == -1)) {
2003
2
          SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
2004
2
          goto error;
2005
2
        }
2006
2007
2.28k
        parser->content_length = unhex_val;
2008
2.28k
        UPDATE_STATE(s_chunk_size);
2009
2.28k
        break;
2010
2.28k
      }
2011
2012
6.85k
      case s_chunk_size:
2013
6.85k
      {
2014
6.85k
        uint64_t t;
2015
2016
6.85k
        assert(parser->flags & F_CHUNKED);
2017
2018
6.85k
        if (ch == CR) {
2019
1.56k
          UPDATE_STATE(s_chunk_size_almost_done);
2020
1.56k
          break;
2021
1.56k
        }
2022
2023
5.29k
        unhex_val = unhex[(unsigned char)ch];
2024
2025
5.29k
        if (unhex_val == -1) {
2026
584
          if (ch == ';' || ch == ' ') {
2027
570
            UPDATE_STATE(s_chunk_parameters);
2028
570
            break;
2029
570
          }
2030
2031
14
          SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
2032
14
          goto error;
2033
584
        }
2034
2035
4.71k
        t = parser->content_length;
2036
4.71k
        t *= 16;
2037
4.71k
        t += unhex_val;
2038
2039
        /* Overflow? Test against a conservative limit for simplicity. */
2040
4.71k
        if (UNLIKELY((ULLONG_MAX - 16) / 16 < parser->content_length)) {
2041
7
          SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
2042
7
          goto error;
2043
7
        }
2044
2045
4.70k
        parser->content_length = t;
2046
4.70k
        break;
2047
4.71k
      }
2048
2049
806
      case s_chunk_parameters:
2050
806
      {
2051
806
        assert(parser->flags & F_CHUNKED);
2052
        /* just ignore this shit. TODO check for overflow */
2053
806
        if (ch == CR) {
2054
537
          UPDATE_STATE(s_chunk_size_almost_done);
2055
537
          break;
2056
537
        }
2057
269
        break;
2058
806
      }
2059
2060
2.08k
      case s_chunk_size_almost_done:
2061
2.08k
      {
2062
2.08k
        assert(parser->flags & F_CHUNKED);
2063
2.08k
        STRICT_CHECK(ch != LF);
2064
2065
2.08k
        parser->nread = 0;
2066
2.08k
        nread = 0;
2067
2068
2.08k
        if (parser->content_length == 0) {
2069
348
          parser->flags |= F_TRAILING;
2070
348
          UPDATE_STATE(s_header_field_start);
2071
1.73k
        } else {
2072
1.73k
          UPDATE_STATE(s_chunk_data);
2073
1.73k
        }
2074
2.08k
        CALLBACK_NOTIFY(chunk_header);
2075
2.08k
        break;
2076
2.08k
      }
2077
2078
2.08k
      case s_chunk_data:
2079
1.62k
      {
2080
1.62k
        uint64_t to_read = MIN(parser->content_length,
2081
1.62k
                               (uint64_t) ((data + len) - p));
2082
2083
1.62k
        assert(parser->flags & F_CHUNKED);
2084
1.62k
        assert(parser->content_length != 0
2085
1.62k
            && parser->content_length != ULLONG_MAX);
2086
2087
        /* See the explanation in s_body_identity for why the content
2088
         * length and data pointers are managed this way.
2089
         */
2090
1.62k
        MARK(body);
2091
1.62k
        parser->content_length -= to_read;
2092
1.62k
        p += to_read - 1;
2093
2094
1.62k
        if (parser->content_length == 0) {
2095
1.43k
          UPDATE_STATE(s_chunk_data_almost_done);
2096
1.43k
        }
2097
2098
1.62k
        break;
2099
1.62k
      }
2100
2101
1.43k
      case s_chunk_data_almost_done:
2102
1.43k
        assert(parser->flags & F_CHUNKED);
2103
1.43k
        assert(parser->content_length == 0);
2104
1.43k
        STRICT_CHECK(ch != CR);
2105
1.43k
        UPDATE_STATE(s_chunk_data_done);
2106
1.43k
        CALLBACK_DATA(body);
2107
1.43k
        break;
2108
2109
1.43k
      case s_chunk_data_done:
2110
1.42k
        assert(parser->flags & F_CHUNKED);
2111
1.42k
        STRICT_CHECK(ch != LF);
2112
1.42k
        parser->nread = 0;
2113
1.42k
        nread = 0;
2114
1.42k
        UPDATE_STATE(s_chunk_size_start);
2115
1.42k
        CALLBACK_NOTIFY(chunk_complete);
2116
1.42k
        break;
2117
2118
1.42k
      default:
2119
0
        assert(0 && "unhandled state");
2120
0
        SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
2121
0
        goto error;
2122
886k
    }
2123
886k
  }
2124
2125
  /* Run callbacks for any marks that we have leftover after we ran out of
2126
   * bytes. There should be at most one of these set, so it's OK to invoke
2127
   * them in series (unset marks will not result in callbacks).
2128
   *
2129
   * We use the NOADVANCE() variety of callbacks here because 'p' has already
2130
   * overflowed 'data' and this allows us to correct for the off-by-one that
2131
   * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
2132
   * value that's in-bounds).
2133
   */
2134
2135
3.24k
  assert(((header_field_mark ? 1 : 0) +
2136
3.24k
          (header_value_mark ? 1 : 0) +
2137
3.24k
          (url_mark ? 1 : 0)  +
2138
3.24k
          (body_mark ? 1 : 0) +
2139
3.24k
          (status_mark ? 1 : 0)) <= 1);
2140
2141
3.24k
  CALLBACK_DATA_NOADVANCE(header_field);
2142
3.24k
  CALLBACK_DATA_NOADVANCE(header_value);
2143
3.24k
  CALLBACK_DATA_NOADVANCE(url);
2144
3.24k
  CALLBACK_DATA_NOADVANCE(body);
2145
3.24k
  CALLBACK_DATA_NOADVANCE(status);
2146
2147
3.24k
  RETURN(len);
2148
2149
1.06k
error:
2150
1.06k
  if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
2151
0
    SET_ERRNO(HPE_UNKNOWN);
2152
0
  }
2153
2154
1.06k
  RETURN(p - data);
2155
0
}
2156
2157
2158
/* Does the parser need to see an EOF to find the end of the message? */
2159
int
2160
http_message_needs_eof (const http_parser *parser)
2161
11.9k
{
2162
11.9k
  if (parser->type == HTTP_REQUEST) {
2163
9.26k
    return 0;
2164
9.26k
  }
2165
2166
  /* See RFC 2616 section 4.4 */
2167
2.69k
  if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
2168
2.69k
      parser->status_code == 204 ||     /* No Content */
2169
2.69k
      parser->status_code == 304 ||     /* Not Modified */
2170
2.69k
      parser->flags & F_SKIPBODY) {     /* response to a HEAD request */
2171
2.64k
    return 0;
2172
2.64k
  }
2173
2174
  /* RFC 7230 3.3.3, see `s_headers_almost_done` */
2175
46
  if ((parser->uses_transfer_encoding == 1) &&
2176
46
      (parser->flags & F_CHUNKED) == 0) {
2177
0
    return 1;
2178
0
  }
2179
2180
46
  if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
2181
0
    return 0;
2182
0
  }
2183
2184
46
  return 1;
2185
46
}
2186
2187
2188
int
2189
http_should_keep_alive (const http_parser *parser)
2190
0
{
2191
0
  if (parser->http_major > 0 && parser->http_minor > 0) {
2192
    /* HTTP/1.1 */
2193
0
    if (parser->flags & F_CONNECTION_CLOSE) {
2194
0
      return 0;
2195
0
    }
2196
0
  } else {
2197
    /* HTTP/1.0 or earlier */
2198
0
    if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
2199
0
      return 0;
2200
0
    }
2201
0
  }
2202
2203
0
  return !http_message_needs_eof(parser);
2204
0
}
2205
2206
2207
const char *
2208
http_method_str (enum http_method m)
2209
0
{
2210
0
  return ELEM_AT(method_strings, m, "<unknown>");
2211
0
}
2212
2213
const char *
2214
http_status_str (enum http_status s)
2215
0
{
2216
0
  switch (s) {
2217
0
#define XX(num, name, string) case HTTP_STATUS_##name: return #string;
2218
0
    HTTP_STATUS_MAP(XX)
2219
0
#undef XX
2220
0
    default: return "<unknown>";
2221
0
  }
2222
0
}
2223
2224
void
2225
http_parser_init (http_parser *parser, enum http_parser_type t)
2226
4.32k
{
2227
4.32k
  void *data = parser->data; /* preserve application data */
2228
4.32k
  memset(parser, 0, sizeof(*parser));
2229
4.32k
  parser->data = data;
2230
4.32k
  parser->type = t;
2231
4.32k
  parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
2232
4.32k
  parser->http_errno = HPE_OK;
2233
4.32k
}
2234
2235
void
2236
http_parser_settings_init(http_parser_settings *settings)
2237
0
{
2238
0
  memset(settings, 0, sizeof(*settings));
2239
0
}
2240
2241
const char *
2242
0
http_errno_name(enum http_errno err) {
2243
0
  assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
2244
0
  return http_strerror_tab[err].name;
2245
0
}
2246
2247
const char *
2248
0
http_errno_description(enum http_errno err) {
2249
0
  assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
2250
0
  return http_strerror_tab[err].description;
2251
0
}
2252
2253
static enum http_host_state
2254
698k
http_parse_host_char(enum http_host_state s, const char ch) {
2255
698k
  switch(s) {
2256
262k
    case s_http_userinfo:
2257
262k
    case s_http_userinfo_start:
2258
262k
      if (ch == '@') {
2259
203
        return s_http_host_start;
2260
203
      }
2261
2262
262k
      if (IS_USERINFO_CHAR(ch)) {
2263
262k
        return s_http_userinfo;
2264
262k
      }
2265
6
      break;
2266
2267
576
    case s_http_host_start:
2268
576
      if (ch == '[') {
2269
193
        return s_http_host_v6_start;
2270
193
      }
2271
2272
383
      if (IS_HOST_CHAR(ch)) {
2273
238
        return s_http_host;
2274
238
      }
2275
2276
145
      break;
2277
2278
199k
    case s_http_host:
2279
199k
      if (IS_HOST_CHAR(ch)) {
2280
199k
        return s_http_host;
2281
199k
      }
2282
2283
    /* fall through */
2284
136
    case s_http_host_v6_end:
2285
136
      if (ch == ':') {
2286
113
        return s_http_host_port_start;
2287
113
      }
2288
2289
23
      break;
2290
2291
1.30k
    case s_http_host_v6:
2292
1.30k
      if (ch == ']') {
2293
8
        return s_http_host_v6_end;
2294
8
      }
2295
2296
    /* fall through */
2297
1.48k
    case s_http_host_v6_start:
2298
1.48k
      if (IS_HEX(ch) || ch == ':' || ch == '.') {
2299
1.35k
        return s_http_host_v6;
2300
1.35k
      }
2301
2302
138
      if (s == s_http_host_v6 && ch == '%') {
2303
94
        return s_http_host_v6_zone_start;
2304
94
      }
2305
44
      break;
2306
2307
1.56k
    case s_http_host_v6_zone:
2308
1.56k
      if (ch == ']') {
2309
1
        return s_http_host_v6_end;
2310
1
      }
2311
2312
    /* fall through */
2313
1.65k
    case s_http_host_v6_zone_start:
2314
      /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */
2315
1.65k
      if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' ||
2316
1.65k
          ch == '~') {
2317
1.64k
        return s_http_host_v6_zone;
2318
1.64k
      }
2319
15
      break;
2320
2321
232k
    case s_http_host_port:
2322
232k
    case s_http_host_port_start:
2323
232k
      if (IS_NUM(ch)) {
2324
232k
        return s_http_host_port;
2325
232k
      }
2326
2327
17
      break;
2328
2329
17
    default:
2330
0
      break;
2331
698k
  }
2332
250
  return s_http_host_dead;
2333
698k
}
2334
2335
static int
2336
760
http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2337
760
  enum http_host_state s;
2338
2339
760
  const char *p;
2340
760
  size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
2341
2342
760
  assert(u->field_set & (1 << UF_HOST));
2343
2344
760
  u->field_data[UF_HOST].len = 0;
2345
2346
760
  s = found_at ? s_http_userinfo_start : s_http_host_start;
2347
2348
699k
  for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
2349
698k
    enum http_host_state new_s = http_parse_host_char(s, *p);
2350
2351
698k
    if (new_s == s_http_host_dead) {
2352
250
      return 1;
2353
250
    }
2354
2355
698k
    switch(new_s) {
2356
199k
      case s_http_host:
2357
199k
        if (s != s_http_host) {
2358
238
          u->field_data[UF_HOST].off = (uint16_t)(p - buf);
2359
238
        }
2360
199k
        u->field_data[UF_HOST].len++;
2361
199k
        break;
2362
2363
1.35k
      case s_http_host_v6:
2364
1.35k
        if (s != s_http_host_v6) {
2365
168
          u->field_data[UF_HOST].off = (uint16_t)(p - buf);
2366
168
        }
2367
1.35k
        u->field_data[UF_HOST].len++;
2368
1.35k
        break;
2369
2370
94
      case s_http_host_v6_zone_start:
2371
1.73k
      case s_http_host_v6_zone:
2372
1.73k
        u->field_data[UF_HOST].len++;
2373
1.73k
        break;
2374
2375
232k
      case s_http_host_port:
2376
232k
        if (s != s_http_host_port) {
2377
68
          u->field_data[UF_PORT].off = (uint16_t)(p - buf);
2378
68
          u->field_data[UF_PORT].len = 0;
2379
68
          u->field_set |= (1 << UF_PORT);
2380
68
        }
2381
232k
        u->field_data[UF_PORT].len++;
2382
232k
        break;
2383
2384
262k
      case s_http_userinfo:
2385
262k
        if (s != s_http_userinfo) {
2386
189
          u->field_data[UF_USERINFO].off = (uint16_t)(p - buf);
2387
189
          u->field_data[UF_USERINFO].len = 0;
2388
189
          u->field_set |= (1 << UF_USERINFO);
2389
189
        }
2390
262k
        u->field_data[UF_USERINFO].len++;
2391
262k
        break;
2392
2393
518
      default:
2394
518
        break;
2395
698k
    }
2396
698k
    s = new_s;
2397
698k
  }
2398
2399
  /* Make sure we don't end somewhere unexpected */
2400
510
  switch (s) {
2401
176
    case s_http_host_start:
2402
177
    case s_http_host_v6_start:
2403
223
    case s_http_host_v6:
2404
226
    case s_http_host_v6_zone_start:
2405
301
    case s_http_host_v6_zone:
2406
331
    case s_http_host_port_start:
2407
331
    case s_http_userinfo:
2408
333
    case s_http_userinfo_start:
2409
333
      return 1;
2410
177
    default:
2411
177
      break;
2412
510
  }
2413
2414
177
  return 0;
2415
510
}
2416
2417
void
2418
1.00k
http_parser_url_init(struct http_parser_url *u) {
2419
1.00k
  memset(u, 0, sizeof(*u));
2420
1.00k
}
2421
2422
int
2423
http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2424
                      struct http_parser_url *u)
2425
2.01k
{
2426
2.01k
  enum state s;
2427
2.01k
  const char *p;
2428
2.01k
  enum http_parser_url_fields uf, old_uf;
2429
2.01k
  int found_at = 0;
2430
2431
2.01k
  if (buflen == 0) {
2432
0
    return 1;
2433
0
  }
2434
2435
2.01k
  u->port = u->field_set = 0;
2436
2.01k
  s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2437
2.01k
  old_uf = UF_MAX;
2438
2439
3.73M
  for (p = buf; p < buf + buflen; p++) {
2440
3.73M
    s = parse_url_char(s, *p);
2441
2442
    /* Figure out the next field that we're operating on */
2443
3.73M
    switch (s) {
2444
1.00k
      case s_dead:
2445
1.00k
        return 1;
2446
2447
      /* Skip delimeters */
2448
71
      case s_req_schema_slash:
2449
103
      case s_req_schema_slash_slash:
2450
125
      case s_req_server_start:
2451
258
      case s_req_query_string_start:
2452
883
      case s_req_fragment_start:
2453
883
        continue;
2454
2455
263k
      case s_req_schema:
2456
263k
        uf = UF_SCHEMA;
2457
263k
        break;
2458
2459
612
      case s_req_server_with_at:
2460
612
        found_at = 1;
2461
2462
      /* fall through */
2463
2.74M
      case s_req_server:
2464
2.74M
        uf = UF_HOST;
2465
2.74M
        break;
2466
2467
456k
      case s_req_path:
2468
456k
        uf = UF_PATH;
2469
456k
        break;
2470
2471
172k
      case s_req_query_string:
2472
172k
        uf = UF_QUERY;
2473
172k
        break;
2474
2475
92.3k
      case s_req_fragment:
2476
92.3k
        uf = UF_FRAGMENT;
2477
92.3k
        break;
2478
2479
0
      default:
2480
0
        assert(!"Unexpected state");
2481
0
        return 1;
2482
3.73M
    }
2483
2484
    /* Nothing's changed; soldier on */
2485
3.72M
    if (uf == old_uf) {
2486
3.72M
      u->field_data[uf].len++;
2487
3.72M
      continue;
2488
3.72M
    }
2489
2490
1.44k
    u->field_data[uf].off = (uint16_t)(p - buf);
2491
1.44k
    u->field_data[uf].len = 1;
2492
2493
1.44k
    u->field_set |= (1 << uf);
2494
1.44k
    old_uf = uf;
2495
1.44k
  }
2496
2497
  /* host must be present if there is a schema */
2498
  /* parsing http:///toto will fail */
2499
1.00k
  if ((u->field_set & (1 << UF_SCHEMA)) &&
2500
1.00k
      (u->field_set & (1 << UF_HOST)) == 0) {
2501
33
    return 1;
2502
33
  }
2503
2504
974
  if (u->field_set & (1 << UF_HOST)) {
2505
760
    if (http_parse_host(buf, u, found_at) != 0) {
2506
583
      return 1;
2507
583
    }
2508
760
  }
2509
2510
  /* CONNECT requests can only contain "hostname:port" */
2511
391
  if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2512
212
    return 1;
2513
212
  }
2514
2515
179
  if (u->field_set & (1 << UF_PORT)) {
2516
66
    uint16_t off;
2517
66
    uint16_t len;
2518
66
    const char* p;
2519
66
    const char* end;
2520
66
    unsigned long v;
2521
2522
66
    off = u->field_data[UF_PORT].off;
2523
66
    len = u->field_data[UF_PORT].len;
2524
66
    end = buf + off + len;
2525
2526
    /* NOTE: The characters are already validated and are in the [0-9] range */
2527
66
    assert((size_t) (off + len) <= buflen && "Port number overflow");
2528
66
    v = 0;
2529
1.87k
    for (p = buf + off; p < end; p++) {
2530
1.83k
      v *= 10;
2531
1.83k
      v += *p - '0';
2532
2533
      /* Ports have a max value of 2^16 */
2534
1.83k
      if (v > 0xffff) {
2535
28
        return 1;
2536
28
      }
2537
1.83k
    }
2538
2539
38
    u->port = (uint16_t) v;
2540
38
  }
2541
2542
151
  return 0;
2543
179
}
2544
2545
void
2546
0
http_parser_pause(http_parser *parser, int paused) {
2547
  /* Users should only be pausing/unpausing a parser that is not in an error
2548
   * state. In non-debug builds, there's not much that we can do about this
2549
   * other than ignore it.
2550
   */
2551
0
  if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2552
0
      HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2553
0
    uint32_t nread = parser->nread; /* used by the SET_ERRNO macro */
2554
0
    SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2555
0
  } else {
2556
0
    assert(0 && "Attempting to pause parser in error state");
2557
0
  }
2558
0
}
2559
2560
int
2561
0
http_body_is_final(const struct http_parser *parser) {
2562
0
    return parser->state == s_message_done;
2563
0
}
2564
2565
unsigned long
2566
0
http_parser_version(void) {
2567
0
  return HTTP_PARSER_VERSION_MAJOR * 0x10000 |
2568
0
         HTTP_PARSER_VERSION_MINOR * 0x00100 |
2569
0
         HTTP_PARSER_VERSION_PATCH * 0x00001;
2570
0
}
2571
2572
void
2573
0
http_parser_set_max_header_size(uint32_t size) {
2574
0
  max_header_size = size;
2575
0
}