Coverage Report

Created: 2023-03-26 06:05

/src/tarantool/src/lib/http_parser/http_parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2010-2017, Tarantool AUTHORS, please see AUTHORS file.
3
 *
4
 * Redistribution and use in source and binary forms, with or
5
 * without modification, are permitted provided that the following
6
 * conditions are met:
7
 *
8
 * 1. Redistributions of source code must retain the above
9
 *    copyright notice, this list of conditions and the
10
 *    following disclaimer.
11
 *
12
 * 2. Redistributions in binary form must reproduce the above
13
 *    copyright notice, this list of conditions and the following
14
 *    disclaimer in the documentation and/or other materials
15
 *    provided with the distribution.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY AUTHORS ``AS IS'' AND
18
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
21
 * AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
22
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25
 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
28
 * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29
 * SUCH DAMAGE.
30
 */
31
#include "http_parser.h"
32
#include <string.h>
33
34
1.07M
#define LF     (unsigned char) '\n'
35
1.07M
#define CR     (unsigned char) '\r'
36
#define CRLF   "\r\n"
37
38
/**
39
 * Following http parser functions were taken with slight
40
 * adaptation from nginx http parser module
41
 */
42
43
void http_parser_create(struct http_parser *parser)
44
429
{
45
429
 parser->hdr_value_start = NULL;
46
429
 parser->hdr_value_end = NULL;
47
429
 parser->http_major = -1;
48
429
 parser->http_minor = -1;
49
429
 parser->hdr_name = NULL;
50
429
 parser->hdr_name_idx = 0;
51
429
}
52
53
/**
54
 * Utility function used in headers parsing
55
 */
56
static int
57
http_parse_status_line(struct http_parser *parser, const char **bufp,
58
           const char *end_buf)
59
221
{
60
221
  char ch;
61
221
  const char *p = *bufp;
62
221
  enum {
63
221
    sw_start = 0,
64
221
    sw_H,
65
221
    sw_HT,
66
221
    sw_HTT,
67
221
    sw_HTTP,
68
221
    sw_first_major_digit,
69
221
    sw_major_digit,
70
221
    sw_first_minor_digit,
71
221
    sw_minor_digit,
72
221
    sw_status,
73
221
    sw_space_after_status,
74
221
    sw_status_text,
75
221
    sw_almost_done
76
221
  } state;
77
78
221
  state = sw_start;
79
221
  int status_count = 0;
80
3.51k
  for (;p < end_buf; p++) {
81
3.43k
    ch = *p;
82
3.43k
    switch (state) {
83
    /* "HTTP/" */
84
221
    case sw_start:
85
221
      if (ch == 'H')
86
221
        state = sw_H;
87
0
      else
88
0
        return HTTP_PARSE_INVALID;
89
221
      break;
90
221
    case sw_H:
91
221
      if (ch == 'T')
92
221
        state = sw_HT;
93
0
      else
94
0
        return HTTP_PARSE_INVALID;
95
221
      break;
96
221
    case sw_HT:
97
221
      if (ch == 'T')
98
221
        state = sw_HTT;
99
0
      else
100
0
        return HTTP_PARSE_INVALID;
101
221
      break;
102
221
    case sw_HTT:
103
221
      if (ch == 'P')
104
221
        state = sw_HTTP;
105
0
      else
106
0
        return HTTP_PARSE_INVALID;
107
221
      break;
108
221
    case sw_HTTP:
109
221
      if (ch == '/')
110
221
        state = sw_first_major_digit;
111
0
      else
112
0
        return HTTP_PARSE_INVALID;
113
221
      break;
114
    /* The first digit of major HTTP version */
115
221
    case sw_first_major_digit:
116
220
      if (ch < '1' || ch > '9') {
117
22
        return HTTP_PARSE_INVALID;
118
22
      }
119
198
      parser->http_major = ch - '0';
120
198
      state = sw_major_digit;
121
198
      break;
122
    /* The major HTTP version or dot */
123
208
    case sw_major_digit:
124
208
      if (ch == '.') {
125
76
        state = sw_first_minor_digit;
126
76
        break;
127
76
      }
128
132
      if (ch == ' ') {
129
67
        parser->http_minor = 0;
130
67
        state = sw_status;
131
67
        break;
132
67
      }
133
65
      if (ch < '0' || ch > '9') {
134
30
        return HTTP_PARSE_INVALID;
135
30
      }
136
35
      if (parser->http_major > 99) {
137
4
        return HTTP_PARSE_INVALID;
138
4
      }
139
31
      parser->http_major = parser->http_major * 10
140
31
               + (ch - '0');
141
31
      break;
142
    /* The first digit of minor HTTP version */
143
75
    case sw_first_minor_digit:
144
75
      if (ch < '0' || ch > '9') {
145
21
        return HTTP_PARSE_INVALID;
146
21
      }
147
54
      parser->http_minor = ch - '0';
148
54
      state = sw_minor_digit;
149
54
      break;
150
    /*
151
     * The minor HTTP version or
152
     * the end of the request line
153
     */
154
258
    case sw_minor_digit:
155
258
      if (ch == ' ') {
156
1
        state = sw_status;
157
1
        break;
158
1
      }
159
257
      if (ch < '0' || ch > '9') {
160
27
        return HTTP_PARSE_INVALID;
161
27
      }
162
230
      if (parser->http_minor > 99) {
163
3
        return HTTP_PARSE_INVALID;
164
3
      }
165
227
      parser->http_minor = parser->http_minor * 10
166
227
               + (ch - '0');
167
227
      break;
168
    /* HTTP status code */
169
448
    case sw_status:
170
448
      if (ch == ' ') {
171
343
        break;
172
343
      }
173
105
      if (ch < '0' || ch > '9') {
174
26
        return HTTP_PARSE_INVALID;
175
26
      }
176
79
      if (++status_count == 3) {
177
24
        state = sw_space_after_status;
178
24
      }
179
79
      break;
180
    /* Space or end of line */
181
23
    case sw_space_after_status:
182
23
      switch (ch) {
183
15
      case ' ':
184
15
        state = sw_status_text;
185
15
        break;
186
3
      case '.':
187
        /* IIS may send 403.1, 403.2, etc */
188
3
        state = sw_status_text;
189
3
        break;
190
3
      case CR:
191
3
        state = sw_almost_done;
192
3
        break;
193
1
      case LF:
194
1
        goto done;
195
1
      default:
196
1
        return HTTP_PARSE_INVALID;
197
23
      }
198
21
      break;
199
    /* Any text until end of line */
200
1.09k
    case sw_status_text:
201
1.09k
      switch (ch) {
202
1
      case CR:
203
1
        state = sw_almost_done;
204
1
        break;
205
1
      case LF:
206
1
        goto done;
207
1.09k
      }
208
1.09k
      break;
209
210
    /* End of status line */
211
1.09k
    case sw_almost_done:
212
2
      switch (ch) {
213
1
      case LF:
214
1
        goto done;
215
1
      default:
216
1
        return HTTP_PARSE_INVALID;
217
2
      }
218
3.43k
    }
219
3.43k
  }
220
86
done:
221
86
  *bufp = p + 1;
222
86
  return HTTP_PARSE_OK;
223
221
}
224
225
int
226
http_parse_header_line(struct http_parser *prsr, const char **bufp,
227
           const char *end_buf, int max_hname_len)
228
429
{
229
429
  char c;
230
429
  unsigned char ch;
231
429
  const char *p = *bufp;
232
429
  const char *header_name_start = p;
233
429
  prsr->hdr_name_idx = 0;
234
235
429
  enum {
236
429
    sw_start = 0,
237
429
    skip_status_line,
238
429
    skipped_status_line_almost_done,
239
429
    sw_name,
240
429
    sw_space_before_value,
241
429
    sw_value,
242
429
    sw_space_after_value,
243
429
    sw_almost_done,
244
429
    sw_header_almost_done
245
429
  } state = sw_start;
246
247
  /*
248
   * The last '\0' is not needed
249
   * because string is zero terminated
250
   */
251
429
  static char lowcase[] =
252
429
      "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
253
429
      "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0-\0\0" "0123456789"
254
429
      "\0\0\0\0\0\0\0abcdefghijklmnopqrstuvwxyz\0\0\0\0_\0"
255
429
      "abcdefghijklmnopqrstuvwxyz\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
256
429
      "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
257
429
      "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
258
429
      "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
259
429
      "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
260
429
      "\0\0\0\0\0\0\0\0\0\0";
261
262
5.24M
  for (; p < end_buf; p++) {
263
5.24M
    ch = *p;
264
5.24M
    switch (state) {
265
    /* first char */
266
564
    case sw_start:
267
564
      switch (ch) {
268
13
      case CR:
269
13
        prsr->hdr_value_end = p;
270
13
        state = sw_header_almost_done;
271
13
        break;
272
2
      case LF:
273
2
        prsr->hdr_value_end = p;
274
2
        goto header_done;
275
549
      default:
276
549
        state = sw_name;
277
549
        c = lowcase[ch];
278
549
        if (c != 0) {
279
441
          prsr->hdr_name[0] = c;
280
441
          prsr->hdr_name_idx = 1;
281
441
          break;
282
441
        }
283
108
        if (ch == '\0') {
284
1
          return HTTP_PARSE_INVALID;
285
1
        }
286
107
        break;
287
564
      }
288
561
      break;
289
1.94k
    case skip_status_line:
290
1.94k
      switch (ch) {
291
2
      case LF:
292
2
        goto skipped_status;
293
3
      case CR:
294
3
        state = skipped_status_line_almost_done;
295
1.93k
      default:
296
1.93k
        break;
297
1.94k
      }
298
1.93k
      break;
299
1.93k
    case skipped_status_line_almost_done:
300
1
      switch (ch) {
301
1
      case LF:
302
1
        goto skipped_status;
303
0
      case CR:
304
0
        break;
305
0
      default:
306
0
        return HTTP_PARSE_INVALID;
307
1
      }
308
0
      break;
309
    /* http_header name */
310
5.24M
    case sw_name:
311
5.24M
      c = lowcase[ch];
312
5.24M
      if (c != 0) {
313
4.16M
        if (prsr->hdr_name_idx < max_hname_len) {
314
4.16M
          prsr->hdr_name[prsr->hdr_name_idx] = c;
315
4.16M
          prsr->hdr_name_idx++;
316
4.16M
        }
317
4.16M
        break;
318
4.16M
      }
319
1.07M
      if (ch == ':') {
320
51
        state = sw_space_before_value;
321
51
        break;
322
51
      }
323
1.07M
      if (ch == CR) {
324
13
        prsr->hdr_value_start = p;
325
13
        prsr->hdr_value_end = p;
326
13
        state = sw_almost_done;
327
13
        break;
328
13
      }
329
1.07M
      if (ch == LF) {
330
1
        prsr->hdr_value_start = p;
331
1
        prsr->hdr_value_end = p;
332
1
        goto done;
333
1
      }
334
      /* handle "HTTP/1.1 ..." lines */
335
1.07M
      if (ch == '/' && p - header_name_start == 4 &&
336
1.07M
        strncmp(header_name_start, "HTTP", 4) == 0) {
337
221
        int rc = http_parse_status_line(prsr,
338
221
              &header_name_start,
339
221
              end_buf);
340
221
        if (rc == HTTP_PARSE_INVALID) {
341
135
          prsr->http_minor = -1;
342
135
          prsr->http_major = -1;
343
135
          state = sw_start;
344
135
        } else {
345
          /* Skip it till end of line. */
346
86
          state = skip_status_line;
347
86
        }
348
221
        break;
349
221
      }
350
1.07M
      if (ch == '\0')
351
5
        return HTTP_PARSE_INVALID;
352
1.07M
      break;
353
    /* space* before http_header value */
354
1.07M
    case sw_space_before_value:
355
236
      switch (ch) {
356
194
      case ' ':
357
194
        break;
358
1
      case CR:
359
1
        prsr->hdr_value_start = p;
360
1
        prsr->hdr_value_end = p;
361
1
        state = sw_almost_done;
362
1
        break;
363
1
      case LF:
364
1
        prsr->hdr_value_start = p;
365
1
        prsr->hdr_value_end = p;
366
1
        goto done;
367
1
      case '\0':
368
1
        return HTTP_PARSE_INVALID;
369
39
      default:
370
39
        prsr->hdr_value_start = p;
371
39
        state = sw_value;
372
39
        break;
373
236
      }
374
234
      break;
375
376
    /* http_header value */
377
596
    case sw_value:
378
596
      switch (ch) {
379
399
      case ' ':
380
399
        prsr->hdr_value_end = p;
381
399
        state = sw_space_after_value;
382
399
        break;
383
1
      case CR:
384
1
        prsr->hdr_value_end = p;
385
1
        state = sw_almost_done;
386
1
        break;
387
1
      case LF:
388
1
        prsr->hdr_value_end = p;
389
1
        goto done;
390
1
      case '\0':
391
1
        return HTTP_PARSE_INVALID;
392
596
      }
393
594
      break;
394
    /* space* before end of http_header line */
395
594
    case sw_space_after_value:
396
577
      switch (ch) {
397
194
      case ' ':
398
194
        break;
399
1
      case CR:
400
1
        state = sw_almost_done;
401
1
        break;
402
1
      case LF:
403
1
        goto done;
404
1
      case '\0':
405
1
        return HTTP_PARSE_INVALID;
406
380
      default:
407
380
        state = sw_value;
408
380
        break;
409
577
      }
410
575
      break;
411
    /* end of http_header line */
412
575
    case sw_almost_done:
413
323
      switch (ch) {
414
1
      case LF:
415
1
        goto done;
416
320
      case CR:
417
320
        break;
418
2
      default:
419
2
        return HTTP_PARSE_INVALID;
420
323
      }
421
320
      break;
422
    /* end of http_header */
423
320
    case sw_header_almost_done:
424
12
      if (ch == LF)
425
1
        goto header_done;
426
11
      else
427
11
        return HTTP_PARSE_INVALID;
428
5.24M
    }
429
5.24M
  }
430
431
399
skipped_status:
432
399
  *bufp = p + 1;
433
399
  return HTTP_PARSE_CONTINUE;
434
435
5
done:
436
5
  *bufp = p + 1;
437
5
  return HTTP_PARSE_OK;
438
439
3
header_done:
440
3
  *bufp = p + 1;
441
3
  return HTTP_PARSE_DONE;
442
429
}