Coverage Report

Created: 2026-01-10 06:38

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/http-parser/http_parser.c
Line
Count
Source
1
/* Copyright Joyent, Inc. and other Node contributors.
2
 *
3
 * Permission is hereby granted, free of charge, to any person obtaining a copy
4
 * of this software and associated documentation files (the "Software"), to
5
 * deal in the Software without restriction, including without limitation the
6
 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
7
 * sell copies of the Software, and to permit persons to whom the Software is
8
 * furnished to do so, subject to the following conditions:
9
 *
10
 * The above copyright notice and this permission notice shall be included in
11
 * all copies or substantial portions of the Software.
12
 *
13
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19
 * IN THE SOFTWARE.
20
 */
21
#include "http_parser.h"
22
#include <assert.h>
23
#include <stddef.h>
24
#include <ctype.h>
25
#include <string.h>
26
#include <limits.h>
27
28
static uint32_t max_header_size = HTTP_MAX_HEADER_SIZE;
29
30
#ifndef ULLONG_MAX
31
# define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
32
#endif
33
34
#ifndef MIN
35
165k
# define MIN(a,b) ((a) < (b) ? (a) : (b))
36
#endif
37
38
#ifndef ARRAY_SIZE
39
0
# define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
40
#endif
41
42
#ifndef BIT_AT
43
# define BIT_AT(a, i)                                                \
44
76.9k
  (!!((unsigned int) (a)[(unsigned int) (i) >> 3] &                  \
45
76.9k
   (1 << ((unsigned int) (i) & 7))))
46
#endif
47
48
#ifndef ELEM_AT
49
0
# define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
50
#endif
51
52
1.04k
#define SET_ERRNO(e)                                                 \
53
1.04k
do {                                                                 \
54
1.04k
  parser->nread = nread;                                             \
55
1.04k
  parser->http_errno = (e);                                          \
56
1.04k
} while(0)
57
58
1.32M
#define CURRENT_STATE() p_state
59
1.09M
#define UPDATE_STATE(V) p_state = (enum state) (V);
60
4.26k
#define RETURN(V)                                                    \
61
4.26k
do {                                                                 \
62
4.26k
  parser->nread = nread;                                             \
63
4.26k
  parser->state = CURRENT_STATE();                                   \
64
4.26k
  return (V);                                                        \
65
4.26k
} while (0);
66
#define REEXECUTE()                                                  \
67
245k
  goto reexecute;                                                    \
68
69
70
#ifdef __GNUC__
71
382k
# define LIKELY(X) __builtin_expect(!!(X), 1)
72
1.68M
# define UNLIKELY(X) __builtin_expect(!!(X), 0)
73
#else
74
# define LIKELY(X) (X)
75
# define UNLIKELY(X) (X)
76
#endif
77
78
79
/* Run the notify callback FOR, returning ER if it fails */
80
56.0k
#define CALLBACK_NOTIFY_(FOR, ER)                                    \
81
56.0k
do {                                                                 \
82
56.0k
  assert(HTTP_PARSER_ERRNO(parser) == HPE_OK);                       \
83
56.0k
                                                                     \
84
56.0k
  if (LIKELY(settings->on_##FOR)) {                                  \
85
0
    parser->state = CURRENT_STATE();                                 \
86
0
    if (UNLIKELY(0 != settings->on_##FOR(parser))) {                 \
87
0
      SET_ERRNO(HPE_CB_##FOR);                                       \
88
0
    }                                                                \
89
0
    UPDATE_STATE(parser->state);                                     \
90
0
                                                                     \
91
0
    /* We either errored above or got paused; get out */             \
92
0
    if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) {             \
93
0
      return (ER);                                                   \
94
0
    }                                                                \
95
0
  }                                                                  \
96
56.0k
} while (0)
97
98
/* Run the notify callback FOR and consume the current byte */
99
55.4k
#define CALLBACK_NOTIFY(FOR)            CALLBACK_NOTIFY_(FOR, p - data + 1)
100
101
/* Run the notify callback FOR and don't consume the current byte */
102
576
#define CALLBACK_NOTIFY_NOADVANCE(FOR)  CALLBACK_NOTIFY_(FOR, p - data)
103
104
/* Run data callback FOR with LEN bytes, returning ER if it fails */
105
340k
#define CALLBACK_DATA_(FOR, LEN, ER)                                 \
106
340k
do {                                                                 \
107
340k
  assert(HTTP_PARSER_ERRNO(parser) == HPE_OK);                       \
108
340k
                                                                     \
109
340k
  if (FOR##_mark) {                                                  \
110
326k
    if (LIKELY(settings->on_##FOR)) {                                \
111
0
      parser->state = CURRENT_STATE();                               \
112
0
      if (UNLIKELY(0 !=                                              \
113
0
                   settings->on_##FOR(parser, FOR##_mark, (LEN)))) { \
114
0
        SET_ERRNO(HPE_CB_##FOR);                                     \
115
0
      }                                                              \
116
0
      UPDATE_STATE(parser->state);                                   \
117
0
                                                                     \
118
0
      /* We either errored above or got paused; get out */           \
119
0
      if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) {           \
120
0
        return (ER);                                                 \
121
0
      }                                                              \
122
0
    }                                                                \
123
326k
    FOR##_mark = NULL;                                               \
124
326k
  }                                                                  \
125
340k
} while (0)
126
127
/* Run the data callback FOR and consume the current byte */
128
#define CALLBACK_DATA(FOR)                                           \
129
154k
    CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
130
131
/* Run the data callback FOR and don't consume the current byte */
132
#define CALLBACK_DATA_NOADVANCE(FOR)                                 \
133
184k
    CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
134
135
/* Set the mark FOR; non-destructive if mark is already set */
136
327k
#define MARK(FOR)                                                    \
137
327k
do {                                                                 \
138
327k
  if (!FOR##_mark) {                                                 \
139
327k
    FOR##_mark = p;                                                  \
140
327k
  }                                                                  \
141
327k
} while (0)
142
143
/* Don't allow the total size of the HTTP headers (including the status
144
 * line) to exceed max_header_size.  This check is here to protect
145
 * embedders against denial-of-service attacks where the attacker feeds
146
 * us a never-ending header that the embedder keeps buffering.
147
 *
148
 * This check is arguably the responsibility of embedders but we're doing
149
 * it on the embedder's behalf because most won't bother and this way we
150
 * make the web a little safer.  max_header_size is still far bigger
151
 * than any reasonable request or response so this should never affect
152
 * day-to-day operation.
153
 */
154
1.19M
#define COUNT_HEADER_SIZE(V)                                         \
155
1.19M
do {                                                                 \
156
1.19M
  nread += (uint32_t)(V);                                            \
157
1.19M
  if (UNLIKELY(nread > max_header_size)) {                           \
158
106
    SET_ERRNO(HPE_HEADER_OVERFLOW);                                  \
159
106
    goto error;                                                      \
160
106
  }                                                                  \
161
1.19M
} while (0)
162
163
164
35.1k
#define PROXY_CONNECTION "proxy-connection"
165
230k
#define CONNECTION "connection"
166
92.8k
#define CONTENT_LENGTH "content-length"
167
235k
#define TRANSFER_ENCODING "transfer-encoding"
168
113k
#define UPGRADE "upgrade"
169
47.5k
#define CHUNKED "chunked"
170
266k
#define KEEP_ALIVE "keep-alive"
171
104k
#define CLOSE "close"
172
173
174
static const char *method_strings[] =
175
  {
176
#define XX(num, name, string) #string,
177
  HTTP_METHOD_MAP(XX)
178
#undef XX
179
  };
180
181
182
/* Tokens as defined by rfc 2616. Also lowercases them.
183
 *        token       = 1*<any CHAR except CTLs or separators>
184
 *     separators     = "(" | ")" | "<" | ">" | "@"
185
 *                    | "," | ";" | ":" | "\" | <">
186
 *                    | "/" | "[" | "]" | "?" | "="
187
 *                    | "{" | "}" | SP | HT
188
 */
189
static const char tokens[256] = {
190
/*   0 nul    1 soh    2 stx    3 etx    4 eot    5 enq    6 ack    7 bel  */
191
        0,       0,       0,       0,       0,       0,       0,       0,
192
/*   8 bs     9 ht    10 nl    11 vt    12 np    13 cr    14 so    15 si   */
193
        0,       0,       0,       0,       0,       0,       0,       0,
194
/*  16 dle   17 dc1   18 dc2   19 dc3   20 dc4   21 nak   22 syn   23 etb */
195
        0,       0,       0,       0,       0,       0,       0,       0,
196
/*  24 can   25 em    26 sub   27 esc   28 fs    29 gs    30 rs    31 us  */
197
        0,       0,       0,       0,       0,       0,       0,       0,
198
/*  32 sp    33  !    34  "    35  #    36  $    37  %    38  &    39  '  */
199
       ' ',     '!',      0,      '#',     '$',     '%',     '&',    '\'',
200
/*  40  (    41  )    42  *    43  +    44  ,    45  -    46  .    47  /  */
201
        0,       0,      '*',     '+',      0,      '-',     '.',      0,
202
/*  48  0    49  1    50  2    51  3    52  4    53  5    54  6    55  7  */
203
       '0',     '1',     '2',     '3',     '4',     '5',     '6',     '7',
204
/*  56  8    57  9    58  :    59  ;    60  <    61  =    62  >    63  ?  */
205
       '8',     '9',      0,       0,       0,       0,       0,       0,
206
/*  64  @    65  A    66  B    67  C    68  D    69  E    70  F    71  G  */
207
        0,      'a',     'b',     'c',     'd',     'e',     'f',     'g',
208
/*  72  H    73  I    74  J    75  K    76  L    77  M    78  N    79  O  */
209
       'h',     'i',     'j',     'k',     'l',     'm',     'n',     'o',
210
/*  80  P    81  Q    82  R    83  S    84  T    85  U    86  V    87  W  */
211
       'p',     'q',     'r',     's',     't',     'u',     'v',     'w',
212
/*  88  X    89  Y    90  Z    91  [    92  \    93  ]    94  ^    95  _  */
213
       'x',     'y',     'z',      0,       0,       0,      '^',     '_',
214
/*  96  `    97  a    98  b    99  c   100  d   101  e   102  f   103  g  */
215
       '`',     'a',     'b',     'c',     'd',     'e',     'f',     'g',
216
/* 104  h   105  i   106  j   107  k   108  l   109  m   110  n   111  o  */
217
       'h',     'i',     'j',     'k',     'l',     'm',     'n',     'o',
218
/* 112  p   113  q   114  r   115  s   116  t   117  u   118  v   119  w  */
219
       'p',     'q',     'r',     's',     't',     'u',     'v',     'w',
220
/* 120  x   121  y   122  z   123  {   124  |   125  }   126  ~   127 del */
221
       'x',     'y',     'z',      0,      '|',      0,      '~',       0 };
222
223
224
static const int8_t unhex[256] =
225
  {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
226
  ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
227
  ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
228
  , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
229
  ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
230
  ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
231
  ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
232
  ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
233
  };
234
235
236
#if HTTP_PARSER_STRICT
237
# define T(v) 0
238
#else
239
# define T(v) v
240
#endif
241
242
243
static const uint8_t normal_url_char[32] = {
244
/*   0 nul    1 soh    2 stx    3 etx    4 eot    5 enq    6 ack    7 bel  */
245
        0    |   0    |   0    |   0    |   0    |   0    |   0    |   0,
246
/*   8 bs     9 ht    10 nl    11 vt    12 np    13 cr    14 so    15 si   */
247
        0    | T(2)   |   0    |   0    | T(16)  |   0    |   0    |   0,
248
/*  16 dle   17 dc1   18 dc2   19 dc3   20 dc4   21 nak   22 syn   23 etb */
249
        0    |   0    |   0    |   0    |   0    |   0    |   0    |   0,
250
/*  24 can   25 em    26 sub   27 esc   28 fs    29 gs    30 rs    31 us  */
251
        0    |   0    |   0    |   0    |   0    |   0    |   0    |   0,
252
/*  32 sp    33  !    34  "    35  #    36  $    37  %    38  &    39  '  */
253
        0    |   2    |   4    |   0    |   16   |   32   |   64   |  128,
254
/*  40  (    41  )    42  *    43  +    44  ,    45  -    46  .    47  /  */
255
        1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
256
/*  48  0    49  1    50  2    51  3    52  4    53  5    54  6    55  7  */
257
        1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
258
/*  56  8    57  9    58  :    59  ;    60  <    61  =    62  >    63  ?  */
259
        1    |   2    |   4    |   8    |   16   |   32   |   64   |   0,
260
/*  64  @    65  A    66  B    67  C    68  D    69  E    70  F    71  G  */
261
        1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
262
/*  72  H    73  I    74  J    75  K    76  L    77  M    78  N    79  O  */
263
        1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
264
/*  80  P    81  Q    82  R    83  S    84  T    85  U    86  V    87  W  */
265
        1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
266
/*  88  X    89  Y    90  Z    91  [    92  \    93  ]    94  ^    95  _  */
267
        1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
268
/*  96  `    97  a    98  b    99  c   100  d   101  e   102  f   103  g  */
269
        1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
270
/* 104  h   105  i   106  j   107  k   108  l   109  m   110  n   111  o  */
271
        1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
272
/* 112  p   113  q   114  r   115  s   116  t   117  u   118  v   119  w  */
273
        1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
274
/* 120  x   121  y   122  z   123  {   124  |   125  }   126  ~   127 del */
275
        1    |   2    |   4    |   8    |   16   |   32   |   64   |   0, };
276
277
#undef T
278
279
enum state
280
  { s_dead = 1 /* important that this is > 0 */
281
282
  , s_start_req_or_res
283
  , s_res_or_resp_H
284
  , s_start_res
285
  , s_res_H
286
  , s_res_HT
287
  , s_res_HTT
288
  , s_res_HTTP
289
  , s_res_http_major
290
  , s_res_http_dot
291
  , s_res_http_minor
292
  , s_res_http_end
293
  , s_res_first_status_code
294
  , s_res_status_code
295
  , s_res_status_start
296
  , s_res_status
297
  , s_res_line_almost_done
298
299
  , s_start_req
300
301
  , s_req_method
302
  , s_req_spaces_before_url
303
  , s_req_schema
304
  , s_req_schema_slash
305
  , s_req_schema_slash_slash
306
  , s_req_server_start
307
  , s_req_server
308
  , s_req_server_with_at
309
  , s_req_path
310
  , s_req_query_string_start
311
  , s_req_query_string
312
  , s_req_fragment_start
313
  , s_req_fragment
314
  , s_req_http_start
315
  , s_req_http_H
316
  , s_req_http_HT
317
  , s_req_http_HTT
318
  , s_req_http_HTTP
319
  , s_req_http_I
320
  , s_req_http_IC
321
  , s_req_http_major
322
  , s_req_http_dot
323
  , s_req_http_minor
324
  , s_req_http_end
325
  , s_req_line_almost_done
326
327
  , s_header_field_start
328
  , s_header_field
329
  , s_header_value_discard_ws
330
  , s_header_value_discard_ws_almost_done
331
  , s_header_value_discard_lws
332
  , s_header_value_start
333
  , s_header_value
334
  , s_header_value_lws
335
336
  , s_header_almost_done
337
338
  , s_chunk_size_start
339
  , s_chunk_size
340
  , s_chunk_parameters
341
  , s_chunk_size_almost_done
342
343
  , s_headers_almost_done
344
  , s_headers_done
345
346
  /* Important: 's_headers_done' must be the last 'header' state. All
347
   * states beyond this must be 'body' states. It is used for overflow
348
   * checking. See the PARSING_HEADER() macro.
349
   */
350
351
  , s_chunk_data
352
  , s_chunk_data_almost_done
353
  , s_chunk_data_done
354
355
  , s_body_identity
356
  , s_body_identity_eof
357
358
  , s_message_done
359
  };
360
361
362
983k
#define PARSING_HEADER(state) (state <= s_headers_done)
363
364
365
enum header_states
366
  { h_general = 0
367
  , h_C
368
  , h_CO
369
  , h_CON
370
371
  , h_matching_connection
372
  , h_matching_proxy_connection
373
  , h_matching_content_length
374
  , h_matching_transfer_encoding
375
  , h_matching_upgrade
376
377
  , h_connection
378
  , h_content_length
379
  , h_content_length_num
380
  , h_content_length_ws
381
  , h_transfer_encoding
382
  , h_upgrade
383
384
  , h_matching_transfer_encoding_token_start
385
  , h_matching_transfer_encoding_chunked
386
  , h_matching_transfer_encoding_token
387
388
  , h_matching_connection_token_start
389
  , h_matching_connection_keep_alive
390
  , h_matching_connection_close
391
  , h_matching_connection_upgrade
392
  , h_matching_connection_token
393
394
  , h_transfer_encoding_chunked
395
  , h_connection_keep_alive
396
  , h_connection_close
397
  , h_connection_upgrade
398
  };
399
400
enum http_host_state
401
  {
402
    s_http_host_dead = 1
403
  , s_http_userinfo_start
404
  , s_http_userinfo
405
  , s_http_host_start
406
  , s_http_host_v6_start
407
  , s_http_host
408
  , s_http_host_v6
409
  , s_http_host_v6_end
410
  , s_http_host_v6_zone_start
411
  , s_http_host_v6_zone
412
  , s_http_host_port_start
413
  , s_http_host_port
414
};
415
416
/* Macros for character classes; depends on strict-mode  */
417
62.3M
#define CR                  '\r'
418
48.5M
#define LF                  '\n'
419
2.53M
#define LOWER(c)            (unsigned char)(c | 0x20)
420
25.6k
#define IS_ALPHA(c)         (LOWER(c) >= 'a' && LOWER(c) <= 'z')
421
14.0k
#define IS_NUM(c)           ((c) >= '0' && (c) <= '9')
422
11.1k
#define IS_ALPHANUM(c)      (IS_ALPHA(c) || IS_NUM(c))
423
0
#define IS_HEX(c)           (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
424
10.2k
#define IS_MARK(c)          ((c) == '-' || (c) == '_' || (c) == '.' || \
425
4.69k
  (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
426
10.2k
  (c) == ')')
427
11.1k
#define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
428
5.56k
  (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
429
5.56k
  (c) == '$' || (c) == ',')
430
431
31.7k
#define STRICT_TOKEN(c)     ((c == ' ') ? 0 : tokens[(unsigned char)c])
432
433
#if HTTP_PARSER_STRICT
434
#define TOKEN(c)            STRICT_TOKEN(c)
435
#define IS_URL_CHAR(c)      (BIT_AT(normal_url_char, (unsigned char)c))
436
#define IS_HOST_CHAR(c)     (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
437
#else
438
6.26M
#define TOKEN(c)            tokens[(unsigned char)c]
439
#define IS_URL_CHAR(c)                                                         \
440
38.4k
  (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
441
#define IS_HOST_CHAR(c)                                                        \
442
0
  (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
443
#endif
444
445
/**
446
 * Verify that a char is a valid visible (printable) US-ASCII
447
 * character or %x80-FF
448
 **/
449
#define IS_HEADER_CHAR(ch)                                                     \
450
16.0M
  (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127))
451
452
#define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
453
454
455
#if HTTP_PARSER_STRICT
456
# define STRICT_CHECK(cond)                                          \
457
do {                                                                 \
458
  if (cond) {                                                        \
459
    SET_ERRNO(HPE_STRICT);                                           \
460
    goto error;                                                      \
461
  }                                                                  \
462
} while (0)
463
# define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
464
#else
465
# define STRICT_CHECK(cond)
466
# define NEW_MESSAGE() start_state
467
#endif
468
469
470
/* Map errno values to strings for human-readable output */
471
#define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
472
static struct {
473
  const char *name;
474
  const char *description;
475
} http_strerror_tab[] = {
476
  HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
477
};
478
#undef HTTP_STRERROR_GEN
479
480
int http_message_needs_eof(const http_parser *parser);
481
482
/* Our URL parser.
483
 *
484
 * This is designed to be shared by http_parser_execute() for URL validation,
485
 * hence it has a state transition + byte-for-byte interface. In addition, it
486
 * is meant to be embedded in http_parser_parse_url(), which does the dirty
487
 * work of turning state transitions URL components for its API.
488
 *
489
 * This function should only be invoked with non-space characters. It is
490
 * assumed that the caller cares about (and can detect) the transition between
491
 * URL and non-URL states by looking for these.
492
 */
493
static enum state
494
parse_url_char(enum state s, const char ch)
495
98.1k
{
496
98.1k
  if (ch == ' ' || ch == '\r' || ch == '\n') {
497
4
    return s_dead;
498
4
  }
499
500
#if HTTP_PARSER_STRICT
501
  if (ch == '\t' || ch == '\f') {
502
    return s_dead;
503
  }
504
#endif
505
506
98.1k
  switch (s) {
507
25.2k
    case s_req_spaces_before_url:
508
      /* Proxied requests are followed by scheme of an absolute URI (alpha).
509
       * All methods except CONNECT are followed by '/' or '*'.
510
       */
511
512
25.2k
      if (ch == '/' || ch == '*') {
513
18.0k
        return s_req_path;
514
18.0k
      }
515
516
7.20k
      if (IS_ALPHA(ch)) {
517
7.17k
        return s_req_schema;
518
7.17k
      }
519
520
35
      break;
521
522
7.35k
    case s_req_schema:
523
7.35k
      if (IS_ALPHA(ch)) {
524
203
        return s;
525
203
      }
526
527
7.14k
      if (ch == ':') {
528
7.12k
        return s_req_schema_slash;
529
7.12k
      }
530
531
22
      break;
532
533
7.12k
    case s_req_schema_slash:
534
7.12k
      if (ch == '/') {
535
7.10k
        return s_req_schema_slash_slash;
536
7.10k
      }
537
538
13
      break;
539
540
7.10k
    case s_req_schema_slash_slash:
541
7.10k
      if (ch == '/') {
542
7.08k
        return s_req_server_start;
543
7.08k
      }
544
545
14
      break;
546
547
436
    case s_req_server_with_at:
548
436
      if (ch == '@') {
549
1
        return s_dead;
550
1
      }
551
552
    /* fall through */
553
7.52k
    case s_req_server_start:
554
12.8k
    case s_req_server:
555
12.8k
      if (ch == '/') {
556
237
        return s_req_path;
557
237
      }
558
559
12.5k
      if (ch == '?') {
560
6.59k
        return s_req_query_string_start;
561
6.59k
      }
562
563
6.00k
      if (ch == '@') {
564
443
        return s_req_server_with_at;
565
443
      }
566
567
5.56k
      if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
568
5.52k
        return s_req_server;
569
5.52k
      }
570
571
40
      break;
572
573
9.04k
    case s_req_path:
574
9.04k
      if (IS_URL_CHAR(ch)) {
575
2.51k
        return s;
576
2.51k
      }
577
578
6.52k
      switch (ch) {
579
6.01k
        case '?':
580
6.01k
          return s_req_query_string_start;
581
582
507
        case '#':
583
507
          return s_req_fragment_start;
584
6.52k
      }
585
586
10
      break;
587
588
5.84k
    case s_req_query_string_start:
589
7.57k
    case s_req_query_string:
590
7.57k
      if (IS_URL_CHAR(ch)) {
591
1.70k
        return s_req_query_string;
592
1.70k
      }
593
594
5.87k
      switch (ch) {
595
207
        case '?':
596
          /* allow extra '?' in query string */
597
207
          return s_req_query_string;
598
599
5.64k
        case '#':
600
5.64k
          return s_req_fragment_start;
601
5.87k
      }
602
603
18
      break;
604
605
6.21k
    case s_req_fragment_start:
606
6.21k
      if (IS_URL_CHAR(ch)) {
607
1.86k
        return s_req_fragment;
608
1.86k
      }
609
610
4.34k
      switch (ch) {
611
4.13k
        case '?':
612
4.13k
          return s_req_fragment;
613
614
202
        case '#':
615
202
          return s;
616
4.34k
      }
617
618
8
      break;
619
620
15.6k
    case s_req_fragment:
621
15.6k
      if (IS_URL_CHAR(ch)) {
622
11.3k
        return s;
623
11.3k
      }
624
625
4.33k
      switch (ch) {
626
140
        case '?':
627
4.32k
        case '#':
628
4.32k
          return s;
629
4.33k
      }
630
631
8
      break;
632
633
8
    default:
634
0
      break;
635
98.1k
  }
636
637
  /* We should never fall out of the switch above unless there's an error */
638
168
  return s_dead;
639
98.1k
}
640
641
size_t http_parser_execute (http_parser *parser,
642
                            const http_parser_settings *settings,
643
                            const char *data,
644
                            size_t len)
645
4.26k
{
646
4.26k
  char c, ch;
647
4.26k
  int8_t unhex_val;
648
4.26k
  const char *p = data;
649
4.26k
  const char *header_field_mark = 0;
650
4.26k
  const char *header_value_mark = 0;
651
4.26k
  const char *url_mark = 0;
652
4.26k
  const char *body_mark = 0;
653
4.26k
  const char *status_mark = 0;
654
4.26k
  enum state p_state = (enum state) parser->state;
655
4.26k
  const unsigned int lenient = parser->lenient_http_headers;
656
4.26k
  const unsigned int allow_chunked_length = parser->allow_chunked_length;
657
658
4.26k
  uint32_t nread = parser->nread;
659
660
  /* We're in an error state. Don't bother doing anything. */
661
4.26k
  if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
662
0
    return 0;
663
0
  }
664
665
4.26k
  if (len == 0) {
666
0
    switch (CURRENT_STATE()) {
667
0
      case s_body_identity_eof:
668
        /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
669
         * we got paused.
670
         */
671
0
        CALLBACK_NOTIFY_NOADVANCE(message_complete);
672
0
        return 0;
673
674
0
      case s_dead:
675
0
      case s_start_req_or_res:
676
0
      case s_start_res:
677
0
      case s_start_req:
678
0
        return 0;
679
680
0
      default:
681
0
        SET_ERRNO(HPE_INVALID_EOF_STATE);
682
0
        return 1;
683
0
    }
684
0
  }
685
686
687
4.26k
  if (CURRENT_STATE() == s_header_field)
688
0
    header_field_mark = data;
689
4.26k
  if (CURRENT_STATE() == s_header_value)
690
0
    header_value_mark = data;
691
4.26k
  switch (CURRENT_STATE()) {
692
0
  case s_req_path:
693
0
  case s_req_schema:
694
0
  case s_req_schema_slash:
695
0
  case s_req_schema_slash_slash:
696
0
  case s_req_server_start:
697
0
  case s_req_server:
698
0
  case s_req_server_with_at:
699
0
  case s_req_query_string_start:
700
0
  case s_req_query_string:
701
0
  case s_req_fragment_start:
702
0
  case s_req_fragment:
703
0
    url_mark = data;
704
0
    break;
705
0
  case s_res_status:
706
0
    status_mark = data;
707
0
    break;
708
4.26k
  default:
709
4.26k
    break;
710
4.26k
  }
711
712
987k
  for (p=data; p != data + len; p++) {
713
983k
    ch = *p;
714
715
983k
    if (PARSING_HEADER(CURRENT_STATE()))
716
979k
      COUNT_HEADER_SIZE(1);
717
718
1.30M
reexecute:
719
1.30M
    switch (CURRENT_STATE()) {
720
721
0
      case s_dead:
722
        /* this state is used after a 'Connection: close' message
723
         * the parser will error out if it reads another message
724
         */
725
0
        if (LIKELY(ch == CR || ch == LF))
726
0
          break;
727
728
0
        SET_ERRNO(HPE_CLOSED_CONNECTION);
729
0
        goto error;
730
731
4.63k
      case s_start_req_or_res:
732
4.63k
      {
733
4.63k
        if (ch == CR || ch == LF)
734
389
          break;
735
4.24k
        parser->flags = 0;
736
4.24k
        parser->uses_transfer_encoding = 0;
737
4.24k
        parser->content_length = ULLONG_MAX;
738
739
4.24k
        if (ch == 'H') {
740
397
          UPDATE_STATE(s_res_or_resp_H);
741
742
397
          CALLBACK_NOTIFY(message_begin);
743
3.84k
        } else {
744
3.84k
          parser->type = HTTP_REQUEST;
745
3.84k
          UPDATE_STATE(s_start_req);
746
3.84k
          REEXECUTE();
747
0
        }
748
749
397
        break;
750
4.24k
      }
751
752
397
      case s_res_or_resp_H:
753
396
        if (ch == 'T') {
754
375
          parser->type = HTTP_RESPONSE;
755
375
          UPDATE_STATE(s_res_HT);
756
375
        } else {
757
21
          if (UNLIKELY(ch != 'E')) {
758
20
            SET_ERRNO(HPE_INVALID_CONSTANT);
759
20
            goto error;
760
20
          }
761
762
1
          parser->type = HTTP_REQUEST;
763
1
          parser->method = HTTP_HEAD;
764
1
          parser->index = 2;
765
1
          UPDATE_STATE(s_req_method);
766
1
        }
767
376
        break;
768
769
3.94k
      case s_start_res:
770
3.94k
      {
771
3.94k
        if (ch == CR || ch == LF)
772
1.85k
          break;
773
2.09k
        parser->flags = 0;
774
2.09k
        parser->uses_transfer_encoding = 0;
775
2.09k
        parser->content_length = ULLONG_MAX;
776
777
2.09k
        if (ch == 'H') {
778
2.06k
          UPDATE_STATE(s_res_H);
779
2.06k
        } else {
780
21
          SET_ERRNO(HPE_INVALID_CONSTANT);
781
21
          goto error;
782
21
        }
783
784
2.06k
        CALLBACK_NOTIFY(message_begin);
785
2.06k
        break;
786
2.06k
      }
787
788
2.06k
      case s_res_H:
789
2.06k
        STRICT_CHECK(ch != 'T');
790
2.06k
        UPDATE_STATE(s_res_HT);
791
2.06k
        break;
792
793
2.42k
      case s_res_HT:
794
2.42k
        STRICT_CHECK(ch != 'T');
795
2.42k
        UPDATE_STATE(s_res_HTT);
796
2.42k
        break;
797
798
2.42k
      case s_res_HTT:
799
2.42k
        STRICT_CHECK(ch != 'P');
800
2.42k
        UPDATE_STATE(s_res_HTTP);
801
2.42k
        break;
802
803
2.41k
      case s_res_HTTP:
804
2.41k
        STRICT_CHECK(ch != '/');
805
2.41k
        UPDATE_STATE(s_res_http_major);
806
2.41k
        break;
807
808
2.41k
      case s_res_http_major:
809
2.41k
        if (UNLIKELY(!IS_NUM(ch))) {
810
15
          SET_ERRNO(HPE_INVALID_VERSION);
811
15
          goto error;
812
15
        }
813
814
2.39k
        parser->http_major = ch - '0';
815
2.39k
        UPDATE_STATE(s_res_http_dot);
816
2.39k
        break;
817
818
2.38k
      case s_res_http_dot:
819
2.38k
      {
820
2.38k
        if (UNLIKELY(ch != '.')) {
821
12
          SET_ERRNO(HPE_INVALID_VERSION);
822
12
          goto error;
823
12
        }
824
825
2.37k
        UPDATE_STATE(s_res_http_minor);
826
2.37k
        break;
827
2.38k
      }
828
829
2.37k
      case s_res_http_minor:
830
2.37k
        if (UNLIKELY(!IS_NUM(ch))) {
831
15
          SET_ERRNO(HPE_INVALID_VERSION);
832
15
          goto error;
833
15
        }
834
835
2.35k
        parser->http_minor = ch - '0';
836
2.35k
        UPDATE_STATE(s_res_http_end);
837
2.35k
        break;
838
839
2.34k
      case s_res_http_end:
840
2.34k
      {
841
2.34k
        if (UNLIKELY(ch != ' ')) {
842
7
          SET_ERRNO(HPE_INVALID_VERSION);
843
7
          goto error;
844
7
        }
845
846
2.33k
        UPDATE_STATE(s_res_first_status_code);
847
2.33k
        break;
848
2.34k
      }
849
850
2.52k
      case s_res_first_status_code:
851
2.52k
      {
852
2.52k
        if (!IS_NUM(ch)) {
853
215
          if (ch == ' ') {
854
196
            break;
855
196
          }
856
857
19
          SET_ERRNO(HPE_INVALID_STATUS);
858
19
          goto error;
859
215
        }
860
2.30k
        parser->status_code = ch - '0';
861
2.30k
        UPDATE_STATE(s_res_status_code);
862
2.30k
        break;
863
2.52k
      }
864
865
6.57k
      case s_res_status_code:
866
6.57k
      {
867
6.57k
        if (!IS_NUM(ch)) {
868
2.27k
          switch (ch) {
869
554
            case ' ':
870
554
              UPDATE_STATE(s_res_status_start);
871
554
              break;
872
221
            case CR:
873
1.70k
            case LF:
874
1.70k
              UPDATE_STATE(s_res_status_start);
875
1.70k
              REEXECUTE();
876
0
              break;
877
16
            default:
878
16
              SET_ERRNO(HPE_INVALID_STATUS);
879
16
              goto error;
880
2.27k
          }
881
554
          break;
882
2.27k
        }
883
884
4.29k
        parser->status_code *= 10;
885
4.29k
        parser->status_code += ch - '0';
886
887
4.29k
        if (UNLIKELY(parser->status_code > 999)) {
888
3
          SET_ERRNO(HPE_INVALID_STATUS);
889
3
          goto error;
890
3
        }
891
892
4.29k
        break;
893
4.29k
      }
894
895
4.29k
      case s_res_status_start:
896
2.25k
      {
897
2.25k
        MARK(status);
898
2.25k
        UPDATE_STATE(s_res_status);
899
2.25k
        parser->index = 0;
900
901
2.25k
        if (ch == CR || ch == LF)
902
1.88k
          REEXECUTE();
903
904
367
        break;
905
2.25k
      }
906
907
3.60k
      case s_res_status:
908
3.60k
        if (ch == CR) {
909
221
          UPDATE_STATE(s_res_line_almost_done);
910
221
          CALLBACK_DATA(status);
911
221
          break;
912
221
        }
913
914
3.38k
        if (ch == LF) {
915
1.99k
          UPDATE_STATE(s_header_field_start);
916
1.99k
          CALLBACK_DATA(status);
917
1.99k
          break;
918
1.99k
        }
919
920
1.39k
        break;
921
922
1.39k
      case s_res_line_almost_done:
923
217
        STRICT_CHECK(ch != LF);
924
217
        UPDATE_STATE(s_header_field_start);
925
217
        break;
926
927
27.2k
      case s_start_req:
928
27.2k
      {
929
27.2k
        if (ch == CR || ch == LF)
930
1.36k
          break;
931
25.9k
        parser->flags = 0;
932
25.9k
        parser->uses_transfer_encoding = 0;
933
25.9k
        parser->content_length = ULLONG_MAX;
934
935
25.9k
        if (UNLIKELY(!IS_ALPHA(ch))) {
936
24
          SET_ERRNO(HPE_INVALID_METHOD);
937
24
          goto error;
938
24
        }
939
940
25.8k
        parser->method = (enum http_method) 0;
941
25.8k
        parser->index = 1;
942
25.8k
        switch (ch) {
943
1.54k
          case 'A': parser->method = HTTP_ACL; break;
944
202
          case 'B': parser->method = HTTP_BIND; break;
945
493
          case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
946
204
          case 'D': parser->method = HTTP_DELETE; break;
947
17.1k
          case 'G': parser->method = HTTP_GET; break;
948
194
          case 'H': parser->method = HTTP_HEAD; break;
949
408
          case 'L': parser->method = HTTP_LOCK; /* or LINK */ break;
950
954
          case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break;
951
205
          case 'N': parser->method = HTTP_NOTIFY; break;
952
13
          case 'O': parser->method = HTTP_OPTIONS; break;
953
2.65k
          case 'P': parser->method = HTTP_POST;
954
            /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
955
2.65k
            break;
956
276
          case 'R': parser->method = HTTP_REPORT; /* or REBIND */ break;
957
725
          case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH, SOURCE */ break;
958
200
          case 'T': parser->method = HTTP_TRACE; break;
959
691
          case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE, UNBIND, UNLINK */ break;
960
6
          default:
961
6
            SET_ERRNO(HPE_INVALID_METHOD);
962
6
            goto error;
963
25.8k
        }
964
25.8k
        UPDATE_STATE(s_req_method);
965
966
25.8k
        CALLBACK_NOTIFY(message_begin);
967
968
25.8k
        break;
969
25.8k
      }
970
971
91.2k
      case s_req_method:
972
91.2k
      {
973
91.2k
        const char *matcher;
974
91.2k
        if (UNLIKELY(ch == '\0')) {
975
1
          SET_ERRNO(HPE_INVALID_METHOD);
976
1
          goto error;
977
1
        }
978
979
91.2k
        matcher = method_strings[parser->method];
980
91.2k
        if (ch == ' ' && matcher[parser->index] == '\0') {
981
25.2k
          UPDATE_STATE(s_req_spaces_before_url);
982
65.9k
        } else if (ch == matcher[parser->index]) {
983
59.2k
          ; /* nada */
984
59.2k
        } else if ((ch >= 'A' && ch <= 'Z') || ch == '-') {
985
986
6.69k
          switch (parser->method << 16 | parser->index << 8 | ch) {
987
0
#define XX(meth, pos, ch, new_meth) \
988
6.39k
            case (HTTP_##meth << 16 | pos << 8 | ch): \
989
6.39k
              parser->method = HTTP_##new_meth; break;
990
991
2.14k
            XX(POST,      1, 'U', PUT)
992
195
            XX(POST,      1, 'A', PATCH)
993
277
            XX(POST,      1, 'R', PROPFIND)
994
280
            XX(PUT,       2, 'R', PURGE)
995
195
            XX(CONNECT,   1, 'H', CHECKOUT)
996
254
            XX(CONNECT,   2, 'P', COPY)
997
261
            XX(MKCOL,     1, 'O', MOVE)
998
196
            XX(MKCOL,     1, 'E', MERGE)
999
253
            XX(MKCOL,     1, '-', MSEARCH)
1000
3
            XX(MKCOL,     2, 'A', MKACTIVITY)
1001
195
            XX(MKCOL,     3, 'A', MKCALENDAR)
1002
259
            XX(SUBSCRIBE, 1, 'E', SEARCH)
1003
414
            XX(SUBSCRIBE, 1, 'O', SOURCE)
1004
253
            XX(REPORT,    2, 'B', REBIND)
1005
194
            XX(PROPFIND,  4, 'P', PROPPATCH)
1006
381
            XX(LOCK,      1, 'I', LINK)
1007
196
            XX(UNLOCK,    2, 'S', UNSUBSCRIBE)
1008
250
            XX(UNLOCK,    2, 'B', UNBIND)
1009
200
            XX(UNLOCK,    3, 'I', UNLINK)
1010
0
#undef XX
1011
297
            default:
1012
297
              SET_ERRNO(HPE_INVALID_METHOD);
1013
297
              goto error;
1014
6.69k
          }
1015
6.69k
        } else {
1016
31
          SET_ERRNO(HPE_INVALID_METHOD);
1017
31
          goto error;
1018
31
        }
1019
1020
90.9k
        ++parser->index;
1021
90.9k
        break;
1022
91.2k
      }
1023
1024
25.4k
      case s_req_spaces_before_url:
1025
25.4k
      {
1026
25.4k
        if (ch == ' ') break;
1027
1028
25.2k
        MARK(url);
1029
25.2k
        if (parser->method == HTTP_CONNECT) {
1030
10
          UPDATE_STATE(s_req_server_start);
1031
10
        }
1032
1033
25.2k
        UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1034
25.2k
        if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1035
41
          SET_ERRNO(HPE_INVALID_URL);
1036
41
          goto error;
1037
41
        }
1038
1039
25.2k
        break;
1040
25.2k
      }
1041
1042
25.2k
      case s_req_schema:
1043
14.4k
      case s_req_schema_slash:
1044
21.5k
      case s_req_schema_slash_slash:
1045
28.6k
      case s_req_server_start:
1046
28.6k
      {
1047
28.6k
        switch (ch) {
1048
          /* No whitespace allowed here */
1049
1
          case ' ':
1050
2
          case CR:
1051
4
          case LF:
1052
4
            SET_ERRNO(HPE_INVALID_URL);
1053
4
            goto error;
1054
28.6k
          default:
1055
28.6k
            UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1056
28.6k
            if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1057
81
              SET_ERRNO(HPE_INVALID_URL);
1058
81
              goto error;
1059
81
            }
1060
28.6k
        }
1061
1062
28.5k
        break;
1063
28.6k
      }
1064
1065
28.5k
      case s_req_server:
1066
5.76k
      case s_req_server_with_at:
1067
26.5k
      case s_req_path:
1068
39.1k
      case s_req_query_string_start:
1069
40.9k
      case s_req_query_string:
1070
47.3k
      case s_req_fragment_start:
1071
68.9k
      case s_req_fragment:
1072
68.9k
      {
1073
68.9k
        switch (ch) {
1074
1.18k
          case ' ':
1075
1.18k
            UPDATE_STATE(s_req_http_start);
1076
1.18k
            CALLBACK_DATA(url);
1077
1.18k
            break;
1078
6.48k
          case CR:
1079
23.5k
          case LF:
1080
23.5k
            parser->http_major = 0;
1081
23.5k
            parser->http_minor = 9;
1082
23.5k
            UPDATE_STATE((ch == CR) ?
1083
23.5k
              s_req_line_almost_done :
1084
23.5k
              s_header_field_start);
1085
23.5k
            CALLBACK_DATA(url);
1086
23.5k
            break;
1087
44.2k
          default:
1088
44.2k
            UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1089
44.2k
            if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1090
51
              SET_ERRNO(HPE_INVALID_URL);
1091
51
              goto error;
1092
51
            }
1093
68.9k
        }
1094
68.8k
        break;
1095
68.9k
      }
1096
1097
68.8k
      case s_req_http_start:
1098
1.36k
        switch (ch) {
1099
195
          case ' ':
1100
195
            break;
1101
941
          case 'H':
1102
941
            UPDATE_STATE(s_req_http_H);
1103
941
            break;
1104
222
          case 'I':
1105
222
            if (parser->method == HTTP_SOURCE) {
1106
214
              UPDATE_STATE(s_req_http_I);
1107
214
              break;
1108
214
            }
1109
            /* fall through */
1110
10
          default:
1111
10
            SET_ERRNO(HPE_INVALID_CONSTANT);
1112
10
            goto error;
1113
1.36k
        }
1114
1.35k
        break;
1115
1116
1.35k
      case s_req_http_H:
1117
936
        STRICT_CHECK(ch != 'T');
1118
936
        UPDATE_STATE(s_req_http_HT);
1119
936
        break;
1120
1121
931
      case s_req_http_HT:
1122
931
        STRICT_CHECK(ch != 'T');
1123
931
        UPDATE_STATE(s_req_http_HTT);
1124
931
        break;
1125
1126
927
      case s_req_http_HTT:
1127
927
        STRICT_CHECK(ch != 'P');
1128
927
        UPDATE_STATE(s_req_http_HTTP);
1129
927
        break;
1130
1131
210
      case s_req_http_I:
1132
210
        STRICT_CHECK(ch != 'C');
1133
210
        UPDATE_STATE(s_req_http_IC);
1134
210
        break;
1135
1136
206
      case s_req_http_IC:
1137
206
        STRICT_CHECK(ch != 'E');
1138
206
        UPDATE_STATE(s_req_http_HTTP);  /* Treat "ICE" as "HTTP". */
1139
206
        break;
1140
1141
1.12k
      case s_req_http_HTTP:
1142
1.12k
        STRICT_CHECK(ch != '/');
1143
1.12k
        UPDATE_STATE(s_req_http_major);
1144
1.12k
        break;
1145
1146
1.11k
      case s_req_http_major:
1147
1.11k
        if (UNLIKELY(!IS_NUM(ch))) {
1148
15
          SET_ERRNO(HPE_INVALID_VERSION);
1149
15
          goto error;
1150
15
        }
1151
1152
1.09k
        parser->http_major = ch - '0';
1153
1.09k
        UPDATE_STATE(s_req_http_dot);
1154
1.09k
        break;
1155
1156
1.08k
      case s_req_http_dot:
1157
1.08k
      {
1158
1.08k
        if (UNLIKELY(ch != '.')) {
1159
12
          SET_ERRNO(HPE_INVALID_VERSION);
1160
12
          goto error;
1161
12
        }
1162
1163
1.07k
        UPDATE_STATE(s_req_http_minor);
1164
1.07k
        break;
1165
1.08k
      }
1166
1167
1.07k
      case s_req_http_minor:
1168
1.07k
        if (UNLIKELY(!IS_NUM(ch))) {
1169
11
          SET_ERRNO(HPE_INVALID_VERSION);
1170
11
          goto error;
1171
11
        }
1172
1173
1.06k
        parser->http_minor = ch - '0';
1174
1.06k
        UPDATE_STATE(s_req_http_end);
1175
1.06k
        break;
1176
1177
1.05k
      case s_req_http_end:
1178
1.05k
      {
1179
1.05k
        if (ch == CR) {
1180
194
          UPDATE_STATE(s_req_line_almost_done);
1181
194
          break;
1182
194
        }
1183
1184
861
        if (ch == LF) {
1185
849
          UPDATE_STATE(s_header_field_start);
1186
849
          break;
1187
849
        }
1188
1189
12
        SET_ERRNO(HPE_INVALID_VERSION);
1190
12
        goto error;
1191
0
        break;
1192
861
      }
1193
1194
      /* end of request line */
1195
6.66k
      case s_req_line_almost_done:
1196
6.66k
      {
1197
6.66k
        if (UNLIKELY(ch != LF)) {
1198
2
          SET_ERRNO(HPE_LF_EXPECTED);
1199
2
          goto error;
1200
2
        }
1201
1202
6.65k
        UPDATE_STATE(s_header_field_start);
1203
6.65k
        break;
1204
6.66k
      }
1205
1206
150k
      case s_header_field_start:
1207
150k
      {
1208
150k
        if (ch == CR) {
1209
256
          UPDATE_STATE(s_headers_almost_done);
1210
256
          break;
1211
256
        }
1212
1213
150k
        if (ch == LF) {
1214
          /* they might be just sending \n instead of \r\n so this would be
1215
           * the second \n to denote the end of headers*/
1216
25.5k
          UPDATE_STATE(s_headers_almost_done);
1217
25.5k
          REEXECUTE();
1218
0
        }
1219
1220
125k
        c = TOKEN(ch);
1221
1222
125k
        if (UNLIKELY(!c)) {
1223
44
          SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1224
44
          goto error;
1225
44
        }
1226
1227
124k
        MARK(header_field);
1228
1229
124k
        parser->index = 0;
1230
124k
        UPDATE_STATE(s_header_field);
1231
1232
124k
        switch (c) {
1233
25.7k
          case 'c':
1234
25.7k
            parser->header_state = h_C;
1235
25.7k
            break;
1236
1237
9.54k
          case 'p':
1238
9.54k
            parser->header_state = h_matching_proxy_connection;
1239
9.54k
            break;
1240
1241
6.34k
          case 't':
1242
6.34k
            parser->header_state = h_matching_transfer_encoding;
1243
6.34k
            break;
1244
1245
8.72k
          case 'u':
1246
8.72k
            parser->header_state = h_matching_upgrade;
1247
8.72k
            break;
1248
1249
74.6k
          default:
1250
74.6k
            parser->header_state = h_general;
1251
74.6k
            break;
1252
124k
        }
1253
124k
        break;
1254
124k
      }
1255
1256
124k
      case s_header_field:
1257
124k
      {
1258
124k
        const char* start = p;
1259
521k
        for (; p != data + len; p++) {
1260
520k
          ch = *p;
1261
520k
          c = TOKEN(ch);
1262
1263
520k
          if (!c)
1264
124k
            break;
1265
1266
396k
          switch (parser->header_state) {
1267
92.0k
            case h_general: {
1268
92.0k
              size_t left = data + len - p;
1269
92.0k
              const char* pe = p + MIN(left, max_header_size);
1270
5.61M
              while (p+1 < pe && TOKEN(p[1])) {
1271
5.52M
                p++;
1272
5.52M
              }
1273
92.0k
              break;
1274
0
            }
1275
1276
25.0k
            case h_C:
1277
25.0k
              parser->index++;
1278
25.0k
              parser->header_state = (c == 'o' ? h_CO : h_general);
1279
25.0k
              break;
1280
1281
24.1k
            case h_CO:
1282
24.1k
              parser->index++;
1283
24.1k
              parser->header_state = (c == 'n' ? h_CON : h_general);
1284
24.1k
              break;
1285
1286
22.7k
            case h_CON:
1287
22.7k
              parser->index++;
1288
22.7k
              switch (c) {
1289
17.8k
                case 'n':
1290
17.8k
                  parser->header_state = h_matching_connection;
1291
17.8k
                  break;
1292
3.93k
                case 't':
1293
3.93k
                  parser->header_state = h_matching_content_length;
1294
3.93k
                  break;
1295
967
                default:
1296
967
                  parser->header_state = h_general;
1297
967
                  break;
1298
22.7k
              }
1299
22.7k
              break;
1300
1301
            /* connection */
1302
1303
78.8k
            case h_matching_connection:
1304
78.8k
              parser->index++;
1305
78.8k
              if (parser->index > sizeof(CONNECTION)-1
1306
78.8k
                  || c != CONNECTION[parser->index]) {
1307
6.41k
                parser->header_state = h_general;
1308
72.3k
              } else if (parser->index == sizeof(CONNECTION)-2) {
1309
10.8k
                parser->header_state = h_connection;
1310
10.8k
              }
1311
78.8k
              break;
1312
1313
            /* proxy-connection */
1314
1315
14.6k
            case h_matching_proxy_connection:
1316
14.6k
              parser->index++;
1317
14.6k
              if (parser->index > sizeof(PROXY_CONNECTION)-1
1318
14.6k
                  || c != PROXY_CONNECTION[parser->index]) {
1319
8.89k
                parser->header_state = h_general;
1320
8.89k
              } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
1321
288
                parser->header_state = h_connection;
1322
288
              }
1323
14.6k
              break;
1324
1325
            /* content-length */
1326
1327
31.2k
            case h_matching_content_length:
1328
31.2k
              parser->index++;
1329
31.2k
              if (parser->index > sizeof(CONTENT_LENGTH)-1
1330
31.2k
                  || c != CONTENT_LENGTH[parser->index]) {
1331
980
                parser->header_state = h_general;
1332
30.2k
              } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1333
2.50k
                parser->header_state = h_content_length;
1334
2.50k
              }
1335
31.2k
              break;
1336
1337
            /* transfer-encoding */
1338
1339
78.8k
            case h_matching_transfer_encoding:
1340
78.8k
              parser->index++;
1341
78.8k
              if (parser->index > sizeof(TRANSFER_ENCODING)-1
1342
78.8k
                  || c != TRANSFER_ENCODING[parser->index]) {
1343
1.52k
                parser->header_state = h_general;
1344
77.3k
              } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1345
4.34k
                parser->header_state = h_transfer_encoding;
1346
4.34k
                parser->uses_transfer_encoding = 1;
1347
4.34k
              }
1348
78.8k
              break;
1349
1350
            /* upgrade */
1351
1352
23.6k
            case h_matching_upgrade:
1353
23.6k
              parser->index++;
1354
23.6k
              if (parser->index > sizeof(UPGRADE)-1
1355
23.6k
                  || c != UPGRADE[parser->index]) {
1356
2.68k
                parser->header_state = h_general;
1357
20.9k
              } else if (parser->index == sizeof(UPGRADE)-2) {
1358
3.34k
                parser->header_state = h_upgrade;
1359
3.34k
              }
1360
23.6k
              break;
1361
1362
517
            case h_connection:
1363
983
            case h_content_length:
1364
1.20k
            case h_transfer_encoding:
1365
4.79k
            case h_upgrade:
1366
4.79k
              if (ch != ' ') parser->header_state = h_general;
1367
4.79k
              break;
1368
1369
0
            default:
1370
0
              assert(0 && "Unknown header_state");
1371
0
              break;
1372
396k
          }
1373
396k
        }
1374
1375
124k
        if (p == data + len) {
1376
316
          --p;
1377
316
          COUNT_HEADER_SIZE(p - start);
1378
306
          break;
1379
316
        }
1380
1381
124k
        COUNT_HEADER_SIZE(p - start);
1382
1383
124k
        if (ch == ':') {
1384
124k
          UPDATE_STATE(s_header_value_discard_ws);
1385
124k
          CALLBACK_DATA(header_field);
1386
124k
          break;
1387
124k
        }
1388
1389
43
        SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1390
43
        goto error;
1391
124k
      }
1392
1393
175k
      case s_header_value_discard_ws:
1394
175k
        if (ch == ' ' || ch == '\t') break;
1395
1396
126k
        if (ch == CR) {
1397
1.27k
          UPDATE_STATE(s_header_value_discard_ws_almost_done);
1398
1.27k
          break;
1399
1.27k
        }
1400
1401
125k
        if (ch == LF) {
1402
77.9k
          UPDATE_STATE(s_header_value_discard_lws);
1403
77.9k
          break;
1404
77.9k
        }
1405
1406
        /* fall through */
1407
1408
94.8k
      case s_header_value_start:
1409
94.8k
      {
1410
94.8k
        MARK(header_value);
1411
1412
94.8k
        UPDATE_STATE(s_header_value);
1413
94.8k
        parser->index = 0;
1414
1415
94.8k
        c = LOWER(ch);
1416
1417
94.8k
        switch (parser->header_state) {
1418
929
          case h_upgrade:
1419
929
            parser->flags |= F_UPGRADE;
1420
929
            parser->header_state = h_general;
1421
929
            break;
1422
1423
4.24k
          case h_transfer_encoding:
1424
            /* looking for 'Transfer-Encoding: chunked' */
1425
4.24k
            if ('c' == c) {
1426
3.13k
              parser->header_state = h_matching_transfer_encoding_chunked;
1427
3.13k
            } else {
1428
1.11k
              parser->header_state = h_matching_transfer_encoding_token;
1429
1.11k
            }
1430
4.24k
            break;
1431
1432
          /* Multi-value `Transfer-Encoding` header */
1433
525
          case h_matching_transfer_encoding_token_start:
1434
525
            break;
1435
1436
2.22k
          case h_content_length:
1437
2.22k
            if (UNLIKELY(!IS_NUM(ch))) {
1438
16
              SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1439
16
              goto error;
1440
16
            }
1441
1442
2.20k
            if (parser->flags & F_CONTENTLENGTH) {
1443
1
              SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1444
1
              goto error;
1445
1
            }
1446
1447
2.20k
            parser->flags |= F_CONTENTLENGTH;
1448
2.20k
            parser->content_length = ch - '0';
1449
2.20k
            parser->header_state = h_content_length_num;
1450
2.20k
            break;
1451
1452
          /* when obsolete line folding is encountered for content length
1453
           * continue to the s_header_value state */
1454
387
          case h_content_length_ws:
1455
387
            break;
1456
1457
10.5k
          case h_connection:
1458
            /* looking for 'Connection: keep-alive' */
1459
10.5k
            if (c == 'k') {
1460
2.03k
              parser->header_state = h_matching_connection_keep_alive;
1461
            /* looking for 'Connection: close' */
1462
8.49k
            } else if (c == 'c') {
1463
3.00k
              parser->header_state = h_matching_connection_close;
1464
5.49k
            } else if (c == 'u') {
1465
2.49k
              parser->header_state = h_matching_connection_upgrade;
1466
3.00k
            } else {
1467
3.00k
              parser->header_state = h_matching_connection_token;
1468
3.00k
            }
1469
10.5k
            break;
1470
1471
          /* Multi-value `Connection` header */
1472
463
          case h_matching_connection_token_start:
1473
463
            break;
1474
1475
75.5k
          default:
1476
75.5k
            parser->header_state = h_general;
1477
75.5k
            break;
1478
94.8k
        }
1479
94.8k
        break;
1480
94.8k
      }
1481
1482
94.8k
      case s_header_value:
1483
94.6k
      {
1484
94.6k
        const char* start = p;
1485
94.6k
        enum header_states h_state = (enum header_states) parser->header_state;
1486
2.51M
        for (; p != data + len; p++) {
1487
2.50M
          ch = *p;
1488
2.50M
          if (ch == CR) {
1489
2.59k
            UPDATE_STATE(s_header_almost_done);
1490
2.59k
            parser->header_state = h_state;
1491
2.59k
            CALLBACK_DATA(header_value);
1492
2.59k
            break;
1493
2.59k
          }
1494
1495
2.50M
          if (ch == LF) {
1496
91.3k
            UPDATE_STATE(s_header_almost_done);
1497
91.3k
            COUNT_HEADER_SIZE(p - start);
1498
91.3k
            parser->header_state = h_state;
1499
91.3k
            CALLBACK_DATA_NOADVANCE(header_value);
1500
91.3k
            REEXECUTE();
1501
0
          }
1502
1503
2.41M
          if (!lenient && !IS_HEADER_CHAR(ch)) {
1504
15
            SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1505
15
            goto error;
1506
15
          }
1507
1508
2.41M
          c = LOWER(ch);
1509
1510
2.41M
          switch (h_state) {
1511
70.6k
            case h_general:
1512
70.6k
              {
1513
70.6k
                size_t left = data + len - p;
1514
70.6k
                const char* pe = p + MIN(left, max_header_size);
1515
1516
13.6M
                for (; p != pe; p++) {
1517
13.6M
                  ch = *p;
1518
13.6M
                  if (ch == CR || ch == LF) {
1519
70.4k
                    --p;
1520
70.4k
                    break;
1521
70.4k
                  }
1522
13.5M
                  if (!lenient && !IS_HEADER_CHAR(ch)) {
1523
25
                    SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1524
25
                    goto error;
1525
25
                  }
1526
13.5M
                }
1527
70.6k
                if (p == data + len)
1528
88
                  --p;
1529
70.6k
                break;
1530
70.6k
              }
1531
1532
0
            case h_connection:
1533
0
            case h_transfer_encoding:
1534
0
              assert(0 && "Shouldn't get here.");
1535
0
              break;
1536
1537
0
            case h_content_length:
1538
0
              if (ch == ' ') break;
1539
0
              h_state = h_content_length_num;
1540
              /* fall through */
1541
1542
7.31k
            case h_content_length_num:
1543
7.31k
            {
1544
7.31k
              uint64_t t;
1545
1546
7.31k
              if (ch == ' ') {
1547
214
                h_state = h_content_length_ws;
1548
214
                break;
1549
214
              }
1550
1551
7.09k
              if (UNLIKELY(!IS_NUM(ch))) {
1552
16
                SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1553
16
                parser->header_state = h_state;
1554
16
                goto error;
1555
16
              }
1556
1557
7.08k
              t = parser->content_length;
1558
7.08k
              t *= 10;
1559
7.08k
              t += ch - '0';
1560
1561
              /* Overflow? Test against a conservative limit for simplicity. */
1562
7.08k
              if (UNLIKELY((ULLONG_MAX - 10) / 10 < parser->content_length)) {
1563
4
                SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1564
4
                parser->header_state = h_state;
1565
4
                goto error;
1566
4
              }
1567
1568
7.07k
              parser->content_length = t;
1569
7.07k
              break;
1570
7.08k
            }
1571
1572
204
            case h_content_length_ws:
1573
204
              if (ch == ' ') break;
1574
10
              SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1575
10
              parser->header_state = h_state;
1576
10
              goto error;
1577
1578
            /* Transfer-Encoding: chunked */
1579
7.20k
            case h_matching_transfer_encoding_token_start:
1580
              /* looking for 'Transfer-Encoding: chunked' */
1581
7.20k
              if ('c' == c) {
1582
1.79k
                h_state = h_matching_transfer_encoding_chunked;
1583
5.41k
              } else if (STRICT_TOKEN(c)) {
1584
                /* TODO(indutny): similar code below does this, but why?
1585
                 * At the very least it seems to be inconsistent given that
1586
                 * h_matching_transfer_encoding_token does not check for
1587
                 * `STRICT_TOKEN`
1588
                 */
1589
3.33k
                h_state = h_matching_transfer_encoding_token;
1590
3.33k
              } else if (c == ' ' || c == '\t') {
1591
                /* Skip lws */
1592
1.47k
              } else {
1593
598
                h_state = h_general;
1594
598
              }
1595
7.20k
              break;
1596
1597
16.7k
            case h_matching_transfer_encoding_chunked:
1598
16.7k
              parser->index++;
1599
16.7k
              if (parser->index > sizeof(CHUNKED)-1
1600
16.7k
                  || c != CHUNKED[parser->index]) {
1601
2.54k
                h_state = h_matching_transfer_encoding_token;
1602
14.1k
              } else if (parser->index == sizeof(CHUNKED)-2) {
1603
2.08k
                h_state = h_transfer_encoding_chunked;
1604
2.08k
              }
1605
16.7k
              break;
1606
1607
500k
            case h_matching_transfer_encoding_token:
1608
500k
              if (ch == ',') {
1609
5.82k
                h_state = h_matching_transfer_encoding_token_start;
1610
5.82k
                parser->index = 0;
1611
5.82k
              }
1612
500k
              break;
1613
1614
51.1k
            case h_matching_connection_token_start:
1615
              /* looking for 'Connection: keep-alive' */
1616
51.1k
              if (c == 'k') {
1617
12.1k
                h_state = h_matching_connection_keep_alive;
1618
              /* looking for 'Connection: close' */
1619
38.9k
              } else if (c == 'c') {
1620
10.6k
                h_state = h_matching_connection_close;
1621
28.3k
              } else if (c == 'u') {
1622
2.00k
                h_state = h_matching_connection_upgrade;
1623
26.3k
              } else if (STRICT_TOKEN(c)) {
1624
8.39k
                h_state = h_matching_connection_token;
1625
17.9k
              } else if (c == ' ' || c == '\t') {
1626
                /* Skip lws */
1627
16.5k
              } else {
1628
1.41k
                h_state = h_general;
1629
1.41k
              }
1630
51.1k
              break;
1631
1632
            /* looking for 'Connection: keep-alive' */
1633
90.4k
            case h_matching_connection_keep_alive:
1634
90.4k
              parser->index++;
1635
90.4k
              if (parser->index > sizeof(KEEP_ALIVE)-1
1636
90.4k
                  || c != KEEP_ALIVE[parser->index]) {
1637
5.16k
                h_state = h_matching_connection_token;
1638
85.3k
              } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1639
8.73k
                h_state = h_connection_keep_alive;
1640
8.73k
              }
1641
90.4k
              break;
1642
1643
            /* looking for 'Connection: close' */
1644
38.2k
            case h_matching_connection_close:
1645
38.2k
              parser->index++;
1646
38.2k
              if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1647
9.62k
                h_state = h_matching_connection_token;
1648
28.5k
              } else if (parser->index == sizeof(CLOSE)-2) {
1649
3.73k
                h_state = h_connection_close;
1650
3.73k
              }
1651
38.2k
              break;
1652
1653
            /* looking for 'Connection: upgrade' */
1654
15.9k
            case h_matching_connection_upgrade:
1655
15.9k
              parser->index++;
1656
15.9k
              if (parser->index > sizeof(UPGRADE) - 1 ||
1657
15.9k
                  c != UPGRADE[parser->index]) {
1658
2.23k
                h_state = h_matching_connection_token;
1659
13.7k
              } else if (parser->index == sizeof(UPGRADE)-2) {
1660
2.00k
                h_state = h_connection_upgrade;
1661
2.00k
              }
1662
15.9k
              break;
1663
1664
1.59M
            case h_matching_connection_token:
1665
1.59M
              if (ch == ',') {
1666
26.3k
                h_state = h_matching_connection_token_start;
1667
26.3k
                parser->index = 0;
1668
26.3k
              }
1669
1.59M
              break;
1670
1671
2.55k
            case h_transfer_encoding_chunked:
1672
2.55k
              if (ch != ' ') h_state = h_matching_transfer_encoding_token;
1673
2.55k
              break;
1674
1675
9.25k
            case h_connection_keep_alive:
1676
16.6k
            case h_connection_close:
1677
17.4k
            case h_connection_upgrade:
1678
17.4k
              if (ch == ',') {
1679
8.31k
                if (h_state == h_connection_keep_alive) {
1680
6.76k
                  parser->flags |= F_CONNECTION_KEEP_ALIVE;
1681
6.76k
                } else if (h_state == h_connection_close) {
1682
1.09k
                  parser->flags |= F_CONNECTION_CLOSE;
1683
1.09k
                } else if (h_state == h_connection_upgrade) {
1684
449
                  parser->flags |= F_CONNECTION_UPGRADE;
1685
449
                }
1686
8.31k
                h_state = h_matching_connection_token_start;
1687
8.31k
                parser->index = 0;
1688
9.14k
              } else if (ch != ' ') {
1689
3.89k
                h_state = h_matching_connection_token;
1690
3.89k
              }
1691
17.4k
              break;
1692
1693
0
            default:
1694
0
              UPDATE_STATE(s_header_value);
1695
0
              h_state = h_general;
1696
0
              break;
1697
2.41M
          }
1698
2.41M
        }
1699
3.19k
        parser->header_state = h_state;
1700
1701
3.19k
        if (p == data + len)
1702
600
          --p;
1703
1704
3.19k
        COUNT_HEADER_SIZE(p - start);
1705
3.15k
        break;
1706
3.19k
      }
1707
1708
93.9k
      case s_header_almost_done:
1709
93.9k
      {
1710
93.9k
        if (UNLIKELY(ch != LF)) {
1711
11
          SET_ERRNO(HPE_LF_EXPECTED);
1712
11
          goto error;
1713
11
        }
1714
1715
93.9k
        UPDATE_STATE(s_header_value_lws);
1716
93.9k
        break;
1717
93.9k
      }
1718
1719
93.8k
      case s_header_value_lws:
1720
93.8k
      {
1721
93.8k
        if (ch == ' ' || ch == '\t') {
1722
47.6k
          if (parser->header_state == h_content_length_num) {
1723
              /* treat obsolete line folding as space */
1724
201
              parser->header_state = h_content_length_ws;
1725
201
          }
1726
47.6k
          UPDATE_STATE(s_header_value_start);
1727
47.6k
          REEXECUTE();
1728
0
        }
1729
1730
        /* finished the header */
1731
46.2k
        switch (parser->header_state) {
1732
363
          case h_connection_keep_alive:
1733
363
            parser->flags |= F_CONNECTION_KEEP_ALIVE;
1734
363
            break;
1735
471
          case h_connection_close:
1736
471
            parser->flags |= F_CONNECTION_CLOSE;
1737
471
            break;
1738
1.37k
          case h_transfer_encoding_chunked:
1739
1.37k
            parser->flags |= F_CHUNKED;
1740
1.37k
            break;
1741
1.41k
          case h_connection_upgrade:
1742
1.41k
            parser->flags |= F_CONNECTION_UPGRADE;
1743
1.41k
            break;
1744
42.5k
          default:
1745
42.5k
            break;
1746
46.2k
        }
1747
1748
46.2k
        UPDATE_STATE(s_header_field_start);
1749
46.2k
        REEXECUTE();
1750
0
      }
1751
1752
1.26k
      case s_header_value_discard_ws_almost_done:
1753
1.26k
      {
1754
1.26k
        STRICT_CHECK(ch != LF);
1755
1.26k
        UPDATE_STATE(s_header_value_discard_lws);
1756
1.26k
        break;
1757
0
      }
1758
1759
79.2k
      case s_header_value_discard_lws:
1760
79.2k
      {
1761
79.2k
        if (ch == ' ' || ch == '\t') {
1762
1.90k
          UPDATE_STATE(s_header_value_discard_ws);
1763
1.90k
          break;
1764
77.3k
        } else {
1765
77.3k
          switch (parser->header_state) {
1766
0
            case h_connection_keep_alive:
1767
0
              parser->flags |= F_CONNECTION_KEEP_ALIVE;
1768
0
              break;
1769
0
            case h_connection_close:
1770
0
              parser->flags |= F_CONNECTION_CLOSE;
1771
0
              break;
1772
0
            case h_connection_upgrade:
1773
0
              parser->flags |= F_CONNECTION_UPGRADE;
1774
0
              break;
1775
0
            case h_transfer_encoding_chunked:
1776
0
              parser->flags |= F_CHUNKED;
1777
0
              break;
1778
1
            case h_content_length:
1779
              /* do not allow empty content length */
1780
1
              SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1781
1
              goto error;
1782
0
              break;
1783
77.3k
            default:
1784
77.3k
              break;
1785
77.3k
          }
1786
1787
          /* header value was empty */
1788
77.3k
          MARK(header_value);
1789
77.3k
          UPDATE_STATE(s_header_field_start);
1790
77.3k
          CALLBACK_DATA_NOADVANCE(header_value);
1791
77.3k
          REEXECUTE();
1792
0
        }
1793
79.2k
      }
1794
1795
25.7k
      case s_headers_almost_done:
1796
25.7k
      {
1797
25.7k
        STRICT_CHECK(ch != LF);
1798
1799
25.7k
        if (parser->flags & F_TRAILING) {
1800
          /* End of a chunked request */
1801
576
          UPDATE_STATE(s_message_done);
1802
576
          CALLBACK_NOTIFY_NOADVANCE(chunk_complete);
1803
576
          REEXECUTE();
1804
0
        }
1805
1806
        /* Cannot use transfer-encoding and a content-length header together
1807
           per the HTTP specification. (RFC 7230 Section 3.3.3) */
1808
25.1k
        if ((parser->uses_transfer_encoding == 1) &&
1809
1.10k
            (parser->flags & F_CONTENTLENGTH)) {
1810
          /* Allow it for lenient parsing as long as `Transfer-Encoding` is
1811
           * not `chunked` or allow_length_with_encoding is set
1812
           */
1813
2
          if (parser->flags & F_CHUNKED) {
1814
1
            if (!allow_chunked_length) {
1815
1
              SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1816
1
              goto error;
1817
1
            }
1818
1
          } else if (!lenient) {
1819
1
            SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1820
1
            goto error;
1821
1
          }
1822
2
        }
1823
1824
25.1k
        UPDATE_STATE(s_headers_done);
1825
1826
        /* Set this here so that on_headers_complete() callbacks can see it */
1827
25.1k
        if ((parser->flags & F_UPGRADE) &&
1828
472
            (parser->flags & F_CONNECTION_UPGRADE)) {
1829
          /* For responses, "Upgrade: foo" and "Connection: upgrade" are
1830
           * mandatory only when it is a 101 Switching Protocols response,
1831
           * otherwise it is purely informational, to announce support.
1832
           */
1833
278
          parser->upgrade =
1834
278
              (parser->type == HTTP_REQUEST || parser->status_code == 101);
1835
24.9k
        } else {
1836
24.9k
          parser->upgrade = (parser->method == HTTP_CONNECT);
1837
24.9k
        }
1838
1839
        /* Here we call the headers_complete callback. This is somewhat
1840
         * different than other callbacks because if the user returns 1, we
1841
         * will interpret that as saying that this message has no body. This
1842
         * is needed for the annoying case of recieving a response to a HEAD
1843
         * request.
1844
         *
1845
         * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1846
         * we have to simulate it by handling a change in errno below.
1847
         */
1848
25.1k
        if (settings->on_headers_complete) {
1849
0
          switch (settings->on_headers_complete(parser)) {
1850
0
            case 0:
1851
0
              break;
1852
1853
0
            case 2:
1854
0
              parser->upgrade = 1;
1855
1856
              /* fall through */
1857
0
            case 1:
1858
0
              parser->flags |= F_SKIPBODY;
1859
0
              break;
1860
1861
0
            default:
1862
0
              SET_ERRNO(HPE_CB_headers_complete);
1863
0
              RETURN(p - data); /* Error */
1864
0
          }
1865
0
        }
1866
1867
25.1k
        if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1868
0
          RETURN(p - data);
1869
0
        }
1870
1871
25.1k
        REEXECUTE();
1872
0
      }
1873
1874
25.1k
      case s_headers_done:
1875
25.1k
      {
1876
25.1k
        int hasBody;
1877
25.1k
        STRICT_CHECK(ch != LF);
1878
1879
25.1k
        parser->nread = 0;
1880
25.1k
        nread = 0;
1881
1882
25.1k
        hasBody = parser->flags & F_CHUNKED ||
1883
24.0k
          (parser->content_length > 0 && parser->content_length != ULLONG_MAX);
1884
25.1k
        if (parser->upgrade && (parser->method == HTTP_CONNECT ||
1885
7
                                (parser->flags & F_SKIPBODY) || !hasBody)) {
1886
          /* Exit, the rest of the message is in a different protocol. */
1887
6
          UPDATE_STATE(NEW_MESSAGE());
1888
6
          CALLBACK_NOTIFY(message_complete);
1889
6
          RETURN((p - data) + 1);
1890
0
        }
1891
1892
25.1k
        if (parser->flags & F_SKIPBODY) {
1893
0
          UPDATE_STATE(NEW_MESSAGE());
1894
0
          CALLBACK_NOTIFY(message_complete);
1895
25.1k
        } else if (parser->flags & F_CHUNKED) {
1896
          /* chunked encoding - ignore Content-Length header,
1897
           * prepare for a chunk */
1898
1.09k
          UPDATE_STATE(s_chunk_size_start);
1899
24.0k
        } else if (parser->uses_transfer_encoding == 1) {
1900
10
          if (parser->type == HTTP_REQUEST && !lenient) {
1901
            /* RFC 7230 3.3.3 */
1902
1903
            /* If a Transfer-Encoding header field
1904
             * is present in a request and the chunked transfer coding is not
1905
             * the final encoding, the message body length cannot be determined
1906
             * reliably; the server MUST respond with the 400 (Bad Request)
1907
             * status code and then close the connection.
1908
             */
1909
9
            SET_ERRNO(HPE_INVALID_TRANSFER_ENCODING);
1910
9
            RETURN(p - data); /* Error */
1911
1
          } else {
1912
            /* RFC 7230 3.3.3 */
1913
1914
            /* If a Transfer-Encoding header field is present in a response and
1915
             * the chunked transfer coding is not the final encoding, the
1916
             * message body length is determined by reading the connection until
1917
             * it is closed by the server.
1918
             */
1919
1
            UPDATE_STATE(s_body_identity_eof);
1920
1
          }
1921
24.0k
        } else {
1922
24.0k
          if (parser->content_length == 0) {
1923
            /* Content-Length header given but zero: Content-Length: 0\r\n */
1924
619
            UPDATE_STATE(NEW_MESSAGE());
1925
619
            CALLBACK_NOTIFY(message_complete);
1926
23.4k
          } else if (parser->content_length != ULLONG_MAX) {
1927
            /* Content-Length header given and non-zero */
1928
1.38k
            UPDATE_STATE(s_body_identity);
1929
22.0k
          } else {
1930
22.0k
            if (!http_message_needs_eof(parser)) {
1931
              /* Assume content-length 0 - read the next */
1932
22.0k
              UPDATE_STATE(NEW_MESSAGE());
1933
22.0k
              CALLBACK_NOTIFY(message_complete);
1934
22.0k
            } else {
1935
              /* Read body until EOF */
1936
52
              UPDATE_STATE(s_body_identity_eof);
1937
52
            }
1938
22.0k
          }
1939
24.0k
        }
1940
1941
25.1k
        break;
1942
25.1k
      }
1943
1944
25.1k
      case s_body_identity:
1945
1.27k
      {
1946
1.27k
        uint64_t to_read = MIN(parser->content_length,
1947
1.27k
                               (uint64_t) ((data + len) - p));
1948
1949
1.27k
        assert(parser->content_length != 0
1950
1.27k
            && parser->content_length != ULLONG_MAX);
1951
1952
        /* The difference between advancing content_length and p is because
1953
         * the latter will automaticaly advance on the next loop iteration.
1954
         * Further, if content_length ends up at 0, we want to see the last
1955
         * byte again for our message complete callback.
1956
         */
1957
1.27k
        MARK(body);
1958
1.27k
        parser->content_length -= to_read;
1959
1.27k
        p += to_read - 1;
1960
1961
1.27k
        if (parser->content_length == 0) {
1962
1.06k
          UPDATE_STATE(s_message_done);
1963
1964
          /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1965
           *
1966
           * The alternative to doing this is to wait for the next byte to
1967
           * trigger the data callback, just as in every other case. The
1968
           * problem with this is that this makes it difficult for the test
1969
           * harness to distinguish between complete-on-EOF and
1970
           * complete-on-length. It's not clear that this distinction is
1971
           * important for applications, but let's keep it for now.
1972
           */
1973
1.06k
          CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1974
1.06k
          REEXECUTE();
1975
0
        }
1976
1977
204
        break;
1978
1.27k
      }
1979
1980
      /* read until EOF */
1981
204
      case s_body_identity_eof:
1982
2
        MARK(body);
1983
2
        p = data + len - 1;
1984
1985
2
        break;
1986
1987
1.64k
      case s_message_done:
1988
1.64k
        UPDATE_STATE(NEW_MESSAGE());
1989
1.64k
        CALLBACK_NOTIFY(message_complete);
1990
1.64k
        if (parser->upgrade) {
1991
          /* Exit, the rest of the message is in a different protocol. */
1992
1
          RETURN((p - data) + 1);
1993
0
        }
1994
1.64k
        break;
1995
1996
2.03k
      case s_chunk_size_start:
1997
2.03k
      {
1998
2.03k
        assert(nread == 1);
1999
2.03k
        assert(parser->flags & F_CHUNKED);
2000
2001
2.03k
        unhex_val = unhex[(unsigned char)ch];
2002
2.03k
        if (UNLIKELY(unhex_val == -1)) {
2003
1
          SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
2004
1
          goto error;
2005
1
        }
2006
2007
2.03k
        parser->content_length = unhex_val;
2008
2.03k
        UPDATE_STATE(s_chunk_size);
2009
2.03k
        break;
2010
2.03k
      }
2011
2012
6.46k
      case s_chunk_size:
2013
6.46k
      {
2014
6.46k
        uint64_t t;
2015
2016
6.46k
        assert(parser->flags & F_CHUNKED);
2017
2018
6.46k
        if (ch == CR) {
2019
1.13k
          UPDATE_STATE(s_chunk_size_almost_done);
2020
1.13k
          break;
2021
1.13k
        }
2022
2023
5.33k
        unhex_val = unhex[(unsigned char)ch];
2024
2025
5.33k
        if (unhex_val == -1) {
2026
772
          if (ch == ';' || ch == ' ') {
2027
761
            UPDATE_STATE(s_chunk_parameters);
2028
761
            break;
2029
761
          }
2030
2031
11
          SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
2032
11
          goto error;
2033
772
        }
2034
2035
4.55k
        t = parser->content_length;
2036
4.55k
        t *= 16;
2037
4.55k
        t += unhex_val;
2038
2039
        /* Overflow? Test against a conservative limit for simplicity. */
2040
4.55k
        if (UNLIKELY((ULLONG_MAX - 16) / 16 < parser->content_length)) {
2041
7
          SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
2042
7
          goto error;
2043
7
        }
2044
2045
4.55k
        parser->content_length = t;
2046
4.55k
        break;
2047
4.55k
      }
2048
2049
75.5k
      case s_chunk_parameters:
2050
75.5k
      {
2051
75.5k
        assert(parser->flags & F_CHUNKED);
2052
        /* just ignore this shit. TODO check for overflow */
2053
75.5k
        if (ch == CR) {
2054
735
          UPDATE_STATE(s_chunk_size_almost_done);
2055
735
          break;
2056
735
        }
2057
74.7k
        break;
2058
75.5k
      }
2059
2060
74.7k
      case s_chunk_size_almost_done:
2061
1.85k
      {
2062
1.85k
        assert(parser->flags & F_CHUNKED);
2063
1.85k
        STRICT_CHECK(ch != LF);
2064
2065
1.85k
        parser->nread = 0;
2066
1.85k
        nread = 0;
2067
2068
1.85k
        if (parser->content_length == 0) {
2069
584
          parser->flags |= F_TRAILING;
2070
584
          UPDATE_STATE(s_header_field_start);
2071
1.27k
        } else {
2072
1.27k
          UPDATE_STATE(s_chunk_data);
2073
1.27k
        }
2074
1.85k
        CALLBACK_NOTIFY(chunk_header);
2075
1.85k
        break;
2076
1.85k
      }
2077
2078
1.85k
      case s_chunk_data:
2079
1.16k
      {
2080
1.16k
        uint64_t to_read = MIN(parser->content_length,
2081
1.16k
                               (uint64_t) ((data + len) - p));
2082
2083
1.16k
        assert(parser->flags & F_CHUNKED);
2084
1.16k
        assert(parser->content_length != 0
2085
1.16k
            && parser->content_length != ULLONG_MAX);
2086
2087
        /* See the explanation in s_body_identity for why the content
2088
         * length and data pointers are managed this way.
2089
         */
2090
1.16k
        MARK(body);
2091
1.16k
        parser->content_length -= to_read;
2092
1.16k
        p += to_read - 1;
2093
2094
1.16k
        if (parser->content_length == 0) {
2095
973
          UPDATE_STATE(s_chunk_data_almost_done);
2096
973
        }
2097
2098
1.16k
        break;
2099
1.16k
      }
2100
2101
965
      case s_chunk_data_almost_done:
2102
965
        assert(parser->flags & F_CHUNKED);
2103
965
        assert(parser->content_length == 0);
2104
965
        STRICT_CHECK(ch != CR);
2105
965
        UPDATE_STATE(s_chunk_data_done);
2106
965
        CALLBACK_DATA(body);
2107
965
        break;
2108
2109
965
      case s_chunk_data_done:
2110
956
        assert(parser->flags & F_CHUNKED);
2111
956
        STRICT_CHECK(ch != LF);
2112
956
        parser->nread = 0;
2113
956
        nread = 0;
2114
956
        UPDATE_STATE(s_chunk_size_start);
2115
956
        CALLBACK_NOTIFY(chunk_complete);
2116
956
        break;
2117
2118
956
      default:
2119
0
        assert(0 && "unhandled state");
2120
0
        SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
2121
0
        goto error;
2122
1.30M
    }
2123
1.30M
  }
2124
2125
  /* Run callbacks for any marks that we have leftover after we ran out of
2126
   * bytes. There should be at most one of these set, so it's OK to invoke
2127
   * them in series (unset marks will not result in callbacks).
2128
   *
2129
   * We use the NOADVANCE() variety of callbacks here because 'p' has already
2130
   * overflowed 'data' and this allows us to correct for the off-by-one that
2131
   * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
2132
   * value that's in-bounds).
2133
   */
2134
2135
4.26k
  assert(((header_field_mark ? 1 : 0) +
2136
3.20k
          (header_value_mark ? 1 : 0) +
2137
3.20k
          (url_mark ? 1 : 0)  +
2138
3.20k
          (body_mark ? 1 : 0) +
2139
3.20k
          (status_mark ? 1 : 0)) <= 1);
2140
2141
3.20k
  CALLBACK_DATA_NOADVANCE(header_field);
2142
3.20k
  CALLBACK_DATA_NOADVANCE(header_value);
2143
3.20k
  CALLBACK_DATA_NOADVANCE(url);
2144
3.20k
  CALLBACK_DATA_NOADVANCE(body);
2145
3.20k
  CALLBACK_DATA_NOADVANCE(status);
2146
2147
3.20k
  RETURN(len);
2148
2149
1.03k
error:
2150
1.03k
  if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
2151
0
    SET_ERRNO(HPE_UNKNOWN);
2152
0
  }
2153
2154
1.03k
  RETURN(p - data);
2155
0
}
2156
2157
2158
/* Does the parser need to see an EOF to find the end of the message? */
2159
int
2160
http_message_needs_eof (const http_parser *parser)
2161
22.0k
{
2162
22.0k
  if (parser->type == HTTP_REQUEST) {
2163
20.1k
    return 0;
2164
20.1k
  }
2165
2166
  /* See RFC 2616 section 4.4 */
2167
1.91k
  if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
2168
1.25k
      parser->status_code == 204 ||     /* No Content */
2169
489
      parser->status_code == 304 ||     /* Not Modified */
2170
1.86k
      parser->flags & F_SKIPBODY) {     /* response to a HEAD request */
2171
1.86k
    return 0;
2172
1.86k
  }
2173
2174
  /* RFC 7230 3.3.3, see `s_headers_almost_done` */
2175
52
  if ((parser->uses_transfer_encoding == 1) &&
2176
0
      (parser->flags & F_CHUNKED) == 0) {
2177
0
    return 1;
2178
0
  }
2179
2180
52
  if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
2181
0
    return 0;
2182
0
  }
2183
2184
52
  return 1;
2185
52
}
2186
2187
2188
int
2189
http_should_keep_alive (const http_parser *parser)
2190
0
{
2191
0
  if (parser->http_major > 0 && parser->http_minor > 0) {
2192
    /* HTTP/1.1 */
2193
0
    if (parser->flags & F_CONNECTION_CLOSE) {
2194
0
      return 0;
2195
0
    }
2196
0
  } else {
2197
    /* HTTP/1.0 or earlier */
2198
0
    if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
2199
0
      return 0;
2200
0
    }
2201
0
  }
2202
2203
0
  return !http_message_needs_eof(parser);
2204
0
}
2205
2206
2207
const char *
2208
http_method_str (enum http_method m)
2209
0
{
2210
0
  return ELEM_AT(method_strings, m, "<unknown>");
2211
0
}
2212
2213
const char *
2214
http_status_str (enum http_status s)
2215
0
{
2216
0
  switch (s) {
2217
0
#define XX(num, name, string) case HTTP_STATUS_##name: return #string;
2218
0
    HTTP_STATUS_MAP(XX)
2219
0
#undef XX
2220
0
    default: return "<unknown>";
2221
0
  }
2222
0
}
2223
2224
void
2225
http_parser_init (http_parser *parser, enum http_parser_type t)
2226
4.26k
{
2227
4.26k
  void *data = parser->data; /* preserve application data */
2228
4.26k
  memset(parser, 0, sizeof(*parser));
2229
4.26k
  parser->data = data;
2230
4.26k
  parser->type = t;
2231
4.26k
  parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
2232
4.26k
  parser->http_errno = HPE_OK;
2233
4.26k
}
2234
2235
void
2236
http_parser_settings_init(http_parser_settings *settings)
2237
0
{
2238
0
  memset(settings, 0, sizeof(*settings));
2239
0
}
2240
2241
const char *
2242
0
http_errno_name(enum http_errno err) {
2243
0
  assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
2244
0
  return http_strerror_tab[err].name;
2245
0
}
2246
2247
const char *
2248
0
http_errno_description(enum http_errno err) {
2249
0
  assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
2250
0
  return http_strerror_tab[err].description;
2251
0
}
2252
2253
static enum http_host_state
2254
0
http_parse_host_char(enum http_host_state s, const char ch) {
2255
0
  switch(s) {
2256
0
    case s_http_userinfo:
2257
0
    case s_http_userinfo_start:
2258
0
      if (ch == '@') {
2259
0
        return s_http_host_start;
2260
0
      }
2261
2262
0
      if (IS_USERINFO_CHAR(ch)) {
2263
0
        return s_http_userinfo;
2264
0
      }
2265
0
      break;
2266
2267
0
    case s_http_host_start:
2268
0
      if (ch == '[') {
2269
0
        return s_http_host_v6_start;
2270
0
      }
2271
2272
0
      if (IS_HOST_CHAR(ch)) {
2273
0
        return s_http_host;
2274
0
      }
2275
2276
0
      break;
2277
2278
0
    case s_http_host:
2279
0
      if (IS_HOST_CHAR(ch)) {
2280
0
        return s_http_host;
2281
0
      }
2282
2283
    /* fall through */
2284
0
    case s_http_host_v6_end:
2285
0
      if (ch == ':') {
2286
0
        return s_http_host_port_start;
2287
0
      }
2288
2289
0
      break;
2290
2291
0
    case s_http_host_v6:
2292
0
      if (ch == ']') {
2293
0
        return s_http_host_v6_end;
2294
0
      }
2295
2296
    /* fall through */
2297
0
    case s_http_host_v6_start:
2298
0
      if (IS_HEX(ch) || ch == ':' || ch == '.') {
2299
0
        return s_http_host_v6;
2300
0
      }
2301
2302
0
      if (s == s_http_host_v6 && ch == '%') {
2303
0
        return s_http_host_v6_zone_start;
2304
0
      }
2305
0
      break;
2306
2307
0
    case s_http_host_v6_zone:
2308
0
      if (ch == ']') {
2309
0
        return s_http_host_v6_end;
2310
0
      }
2311
2312
    /* fall through */
2313
0
    case s_http_host_v6_zone_start:
2314
      /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */
2315
0
      if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' ||
2316
0
          ch == '~') {
2317
0
        return s_http_host_v6_zone;
2318
0
      }
2319
0
      break;
2320
2321
0
    case s_http_host_port:
2322
0
    case s_http_host_port_start:
2323
0
      if (IS_NUM(ch)) {
2324
0
        return s_http_host_port;
2325
0
      }
2326
2327
0
      break;
2328
2329
0
    default:
2330
0
      break;
2331
0
  }
2332
0
  return s_http_host_dead;
2333
0
}
2334
2335
static int
2336
0
http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2337
0
  enum http_host_state s;
2338
2339
0
  const char *p;
2340
0
  size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
2341
2342
0
  assert(u->field_set & (1 << UF_HOST));
2343
2344
0
  u->field_data[UF_HOST].len = 0;
2345
2346
0
  s = found_at ? s_http_userinfo_start : s_http_host_start;
2347
2348
0
  for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
2349
0
    enum http_host_state new_s = http_parse_host_char(s, *p);
2350
2351
0
    if (new_s == s_http_host_dead) {
2352
0
      return 1;
2353
0
    }
2354
2355
0
    switch(new_s) {
2356
0
      case s_http_host:
2357
0
        if (s != s_http_host) {
2358
0
          u->field_data[UF_HOST].off = (uint16_t)(p - buf);
2359
0
        }
2360
0
        u->field_data[UF_HOST].len++;
2361
0
        break;
2362
2363
0
      case s_http_host_v6:
2364
0
        if (s != s_http_host_v6) {
2365
0
          u->field_data[UF_HOST].off = (uint16_t)(p - buf);
2366
0
        }
2367
0
        u->field_data[UF_HOST].len++;
2368
0
        break;
2369
2370
0
      case s_http_host_v6_zone_start:
2371
0
      case s_http_host_v6_zone:
2372
0
        u->field_data[UF_HOST].len++;
2373
0
        break;
2374
2375
0
      case s_http_host_port:
2376
0
        if (s != s_http_host_port) {
2377
0
          u->field_data[UF_PORT].off = (uint16_t)(p - buf);
2378
0
          u->field_data[UF_PORT].len = 0;
2379
0
          u->field_set |= (1 << UF_PORT);
2380
0
        }
2381
0
        u->field_data[UF_PORT].len++;
2382
0
        break;
2383
2384
0
      case s_http_userinfo:
2385
0
        if (s != s_http_userinfo) {
2386
0
          u->field_data[UF_USERINFO].off = (uint16_t)(p - buf);
2387
0
          u->field_data[UF_USERINFO].len = 0;
2388
0
          u->field_set |= (1 << UF_USERINFO);
2389
0
        }
2390
0
        u->field_data[UF_USERINFO].len++;
2391
0
        break;
2392
2393
0
      default:
2394
0
        break;
2395
0
    }
2396
0
    s = new_s;
2397
0
  }
2398
2399
  /* Make sure we don't end somewhere unexpected */
2400
0
  switch (s) {
2401
0
    case s_http_host_start:
2402
0
    case s_http_host_v6_start:
2403
0
    case s_http_host_v6:
2404
0
    case s_http_host_v6_zone_start:
2405
0
    case s_http_host_v6_zone:
2406
0
    case s_http_host_port_start:
2407
0
    case s_http_userinfo:
2408
0
    case s_http_userinfo_start:
2409
0
      return 1;
2410
0
    default:
2411
0
      break;
2412
0
  }
2413
2414
0
  return 0;
2415
0
}
2416
2417
void
2418
0
http_parser_url_init(struct http_parser_url *u) {
2419
0
  memset(u, 0, sizeof(*u));
2420
0
}
2421
2422
int
2423
http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2424
                      struct http_parser_url *u)
2425
0
{
2426
0
  enum state s;
2427
0
  const char *p;
2428
0
  enum http_parser_url_fields uf, old_uf;
2429
0
  int found_at = 0;
2430
2431
0
  if (buflen == 0) {
2432
0
    return 1;
2433
0
  }
2434
2435
0
  u->port = u->field_set = 0;
2436
0
  s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2437
0
  old_uf = UF_MAX;
2438
2439
0
  for (p = buf; p < buf + buflen; p++) {
2440
0
    s = parse_url_char(s, *p);
2441
2442
    /* Figure out the next field that we're operating on */
2443
0
    switch (s) {
2444
0
      case s_dead:
2445
0
        return 1;
2446
2447
      /* Skip delimeters */
2448
0
      case s_req_schema_slash:
2449
0
      case s_req_schema_slash_slash:
2450
0
      case s_req_server_start:
2451
0
      case s_req_query_string_start:
2452
0
      case s_req_fragment_start:
2453
0
        continue;
2454
2455
0
      case s_req_schema:
2456
0
        uf = UF_SCHEMA;
2457
0
        break;
2458
2459
0
      case s_req_server_with_at:
2460
0
        found_at = 1;
2461
2462
      /* fall through */
2463
0
      case s_req_server:
2464
0
        uf = UF_HOST;
2465
0
        break;
2466
2467
0
      case s_req_path:
2468
0
        uf = UF_PATH;
2469
0
        break;
2470
2471
0
      case s_req_query_string:
2472
0
        uf = UF_QUERY;
2473
0
        break;
2474
2475
0
      case s_req_fragment:
2476
0
        uf = UF_FRAGMENT;
2477
0
        break;
2478
2479
0
      default:
2480
0
        assert(!"Unexpected state");
2481
0
        return 1;
2482
0
    }
2483
2484
    /* Nothing's changed; soldier on */
2485
0
    if (uf == old_uf) {
2486
0
      u->field_data[uf].len++;
2487
0
      continue;
2488
0
    }
2489
2490
0
    u->field_data[uf].off = (uint16_t)(p - buf);
2491
0
    u->field_data[uf].len = 1;
2492
2493
0
    u->field_set |= (1 << uf);
2494
0
    old_uf = uf;
2495
0
  }
2496
2497
  /* host must be present if there is a schema */
2498
  /* parsing http:///toto will fail */
2499
0
  if ((u->field_set & (1 << UF_SCHEMA)) &&
2500
0
      (u->field_set & (1 << UF_HOST)) == 0) {
2501
0
    return 1;
2502
0
  }
2503
2504
0
  if (u->field_set & (1 << UF_HOST)) {
2505
0
    if (http_parse_host(buf, u, found_at) != 0) {
2506
0
      return 1;
2507
0
    }
2508
0
  }
2509
2510
  /* CONNECT requests can only contain "hostname:port" */
2511
0
  if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2512
0
    return 1;
2513
0
  }
2514
2515
0
  if (u->field_set & (1 << UF_PORT)) {
2516
0
    uint16_t off;
2517
0
    uint16_t len;
2518
0
    const char* p;
2519
0
    const char* end;
2520
0
    unsigned long v;
2521
2522
0
    off = u->field_data[UF_PORT].off;
2523
0
    len = u->field_data[UF_PORT].len;
2524
0
    end = buf + off + len;
2525
2526
    /* NOTE: The characters are already validated and are in the [0-9] range */
2527
0
    assert((size_t) (off + len) <= buflen && "Port number overflow");
2528
0
    v = 0;
2529
0
    for (p = buf + off; p < end; p++) {
2530
0
      v *= 10;
2531
0
      v += *p - '0';
2532
2533
      /* Ports have a max value of 2^16 */
2534
0
      if (v > 0xffff) {
2535
0
        return 1;
2536
0
      }
2537
0
    }
2538
2539
0
    u->port = (uint16_t) v;
2540
0
  }
2541
2542
0
  return 0;
2543
0
}
2544
2545
void
2546
0
http_parser_pause(http_parser *parser, int paused) {
2547
  /* Users should only be pausing/unpausing a parser that is not in an error
2548
   * state. In non-debug builds, there's not much that we can do about this
2549
   * other than ignore it.
2550
   */
2551
0
  if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2552
0
      HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2553
0
    uint32_t nread = parser->nread; /* used by the SET_ERRNO macro */
2554
0
    SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2555
0
  } else {
2556
0
    assert(0 && "Attempting to pause parser in error state");
2557
0
  }
2558
0
}
2559
2560
int
2561
0
http_body_is_final(const struct http_parser *parser) {
2562
0
    return parser->state == s_message_done;
2563
0
}
2564
2565
unsigned long
2566
0
http_parser_version(void) {
2567
0
  return HTTP_PARSER_VERSION_MAJOR * 0x10000 |
2568
0
         HTTP_PARSER_VERSION_MINOR * 0x00100 |
2569
0
         HTTP_PARSER_VERSION_PATCH * 0x00001;
2570
0
}
2571
2572
void
2573
0
http_parser_set_max_header_size(uint32_t size) {
2574
0
  max_header_size = size;
2575
0
}