Coverage Report

Created: 2024-09-11 06:26

/src/h2o/deps/picohttpparser/picohttpparser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase,
3
 *                         Shigeo Mitsunari
4
 *
5
 * The software is licensed under either the MIT License (below) or the Perl
6
 * license.
7
 *
8
 * Permission is hereby granted, free of charge, to any person obtaining a copy
9
 * of this software and associated documentation files (the "Software"), to
10
 * deal in the Software without restriction, including without limitation the
11
 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
12
 * sell copies of the Software, and to permit persons to whom the Software is
13
 * furnished to do so, subject to the following conditions:
14
 *
15
 * The above copyright notice and this permission notice shall be included in
16
 * all copies or substantial portions of the Software.
17
 *
18
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24
 * IN THE SOFTWARE.
25
 */
26
27
#include <assert.h>
28
#include <stddef.h>
29
#include <string.h>
30
#ifdef __SSE4_2__
31
#ifdef _MSC_VER
32
#include <nmmintrin.h>
33
#else
34
#include <x86intrin.h>
35
#endif
36
#endif
37
#include "picohttpparser.h"
38
39
#if __GNUC__ >= 3
40
643k
#define likely(x) __builtin_expect(!!(x), 1)
41
1.32M
#define unlikely(x) __builtin_expect(!!(x), 0)
42
#else
43
#define likely(x) (x)
44
#define unlikely(x) (x)
45
#endif
46
47
#ifdef _MSC_VER
48
#define ALIGNED(n) _declspec(align(n))
49
#else
50
#define ALIGNED(n) __attribute__((aligned(n)))
51
#endif
52
53
#define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)-040u < 0137u)
54
55
#define CHECK_EOF()                                                                                                                \
56
1.90M
    if (buf == buf_end) {                                                                                                          \
57
1.81k
        *ret = -2;                                                                                                                 \
58
1.81k
        return NULL;                                                                                                               \
59
1.81k
    }
60
61
#define EXPECT_CHAR_NO_CHECK(ch)                                                                                                   \
62
125k
    if (*buf++ != ch) {                                                                                                            \
63
4.51k
        *ret = -1;                                                                                                                 \
64
4.51k
        return NULL;                                                                                                               \
65
4.51k
    }
66
67
#define EXPECT_CHAR(ch)                                                                                                            \
68
14.5k
    CHECK_EOF();                                                                                                                   \
69
14.4k
    EXPECT_CHAR_NO_CHECK(ch);
70
71
#define ADVANCE_TOKEN(tok, toklen)                                                                                                 \
72
14.5k
    do {                                                                                                                           \
73
14.5k
        const char *tok_start = buf;                                                                                               \
74
14.5k
        static const char ALIGNED(16) ranges2[16] = "\000\040\177\177";                                                            \
75
14.5k
        int found2;                                                                                                                \
76
14.5k
        buf = findchar_fast(buf, buf_end, ranges2, 4, &found2);                                                                    \
77
14.5k
        if (!found2) {                                                                                                             \
78
14.5k
            CHECK_EOF();                                                                                                           \
79
14.5k
        }                                                                                                                          \
80
743k
        while (1) {                                                                                                                \
81
743k
            if (*buf == ' ') {                                                                                                     \
82
14.2k
                break;                                                                                                             \
83
728k
            } else if (unlikely(!IS_PRINTABLE_ASCII(*buf))) {                                                                      \
84
352k
                if ((unsigned char)*buf < '\040' || *buf == '\177') {                                                              \
85
133
                    *ret = -1;                                                                                                     \
86
133
                    return NULL;                                                                                                   \
87
133
                }                                                                                                                  \
88
352k
            }                                                                                                                      \
89
743k
            ++buf;                                                                                                                 \
90
728k
            CHECK_EOF();                                                                                                           \
91
728k
        }                                                                                                                          \
92
14.5k
        tok = tok_start;                                                                                                           \
93
14.2k
        toklen = buf - tok_start;                                                                                                  \
94
14.2k
    } while (0)
95
96
static const char *token_char_map = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
97
                                    "\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
98
                                    "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
99
                                    "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
100
                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
101
                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
102
                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
103
                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
104
105
static const char *findchar_fast(const char *buf, const char *buf_end, const char *ranges, size_t ranges_size, int *found)
106
78.3k
{
107
78.3k
    *found = 0;
108
#if __SSE4_2__
109
    if (likely(buf_end - buf >= 16)) {
110
        __m128i ranges16 = _mm_loadu_si128((const __m128i *)ranges);
111
112
        size_t left = (buf_end - buf) & ~15;
113
        do {
114
            __m128i b16 = _mm_loadu_si128((const __m128i *)buf);
115
            int r = _mm_cmpestri(ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS);
116
            if (unlikely(r != 16)) {
117
                buf += r;
118
                *found = 1;
119
                break;
120
            }
121
            buf += 16;
122
            left -= 16;
123
        } while (likely(left != 0));
124
    }
125
#else
126
    /* suppress unused parameter warning */
127
78.3k
    (void)buf_end;
128
78.3k
    (void)ranges;
129
78.3k
    (void)ranges_size;
130
78.3k
#endif
131
78.3k
    return buf;
132
78.3k
}
133
134
static const char *get_token_to_eol(const char *buf, const char *buf_end, const char **token, size_t *token_len, int *ret)
135
52.5k
{
136
52.5k
    const char *token_start = buf;
137
138
#ifdef __SSE4_2__
139
    static const char ALIGNED(16) ranges1[16] = "\0\010"    /* allow HT */
140
                                                "\012\037"  /* allow SP and up to but not including DEL */
141
                                                "\177\177"; /* allow chars w. MSB set */
142
    int found;
143
    buf = findchar_fast(buf, buf_end, ranges1, 6, &found);
144
    if (found)
145
        goto FOUND_CTL;
146
#else
147
    /* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */
148
192k
    while (likely(buf_end - buf >= 8)) {
149
188k
#define DOIT()                                                                                                                     \
150
464k
    do {                                                                                                                           \
151
464k
        if (unlikely(!IS_PRINTABLE_ASCII(*buf)))                                                                                   \
152
464k
            goto NonPrintable;                                                                                                     \
153
464k
        ++buf;                                                                                                                     \
154
297k
    } while (0)
155
188k
        DOIT();
156
58.4k
        DOIT();
157
51.4k
        DOIT();
158
44.4k
        DOIT();
159
36.3k
        DOIT();
160
33.8k
        DOIT();
161
28.1k
        DOIT();
162
23.6k
        DOIT();
163
21.0k
#undef DOIT
164
21.0k
        continue;
165
167k
    NonPrintable:
166
167k
        if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
167
48.2k
            goto FOUND_CTL;
168
48.2k
        }
169
118k
        ++buf;
170
118k
    }
171
4.30k
#endif
172
7.81k
    for (;; ++buf) {
173
7.81k
        CHECK_EOF();
174
7.47k
        if (unlikely(!IS_PRINTABLE_ASCII(*buf))) {
175
5.04k
            if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
176
3.96k
                goto FOUND_CTL;
177
3.96k
            }
178
5.04k
        }
179
7.47k
    }
180
52.2k
FOUND_CTL:
181
52.2k
    if (likely(*buf == '\015')) {
182
8.94k
        ++buf;
183
17.8k
        EXPECT_CHAR('\012');
184
17.8k
        *token_len = buf - 2 - token_start;
185
43.3k
    } else if (*buf == '\012') {
186
43.2k
        *token_len = buf - token_start;
187
43.2k
        ++buf;
188
43.2k
    } else {
189
76
        *ret = -1;
190
76
        return NULL;
191
76
    }
192
52.1k
    *token = token_start;
193
194
52.1k
    return buf;
195
52.2k
}
196
197
static const char *is_complete(const char *buf, const char *buf_end, size_t last_len, int *ret)
198
1.44k
{
199
1.44k
    int ret_cnt = 0;
200
1.44k
    buf = last_len < 3 ? buf : buf + last_len - 3;
201
202
200k
    while (1) {
203
200k
        CHECK_EOF();
204
200k
        if (*buf == '\015') {
205
956
            ++buf;
206
956
            CHECK_EOF();
207
1.82k
            EXPECT_CHAR('\012');
208
1.82k
            ++ret_cnt;
209
199k
        } else if (*buf == '\012') {
210
5.45k
            ++buf;
211
5.45k
            ++ret_cnt;
212
193k
        } else {
213
193k
            ++buf;
214
193k
            ret_cnt = 0;
215
193k
        }
216
200k
        if (ret_cnt == 2) {
217
1.17k
            return buf;
218
1.17k
        }
219
200k
    }
220
221
0
    *ret = -2;
222
0
    return NULL;
223
1.44k
}
224
225
#define PARSE_INT(valp_, mul_)                                                                                                     \
226
20.7k
    if (*buf < '0' || '9' < *buf) {                                                                                                \
227
8
        buf++;                                                                                                                     \
228
8
        *ret = -1;                                                                                                                 \
229
8
        return NULL;                                                                                                               \
230
8
    }                                                                                                                              \
231
20.7k
    *(valp_) = (mul_) * (*buf++ - '0');
232
233
#define PARSE_INT_3(valp_)                                                                                                         \
234
2.87k
    do {                                                                                                                           \
235
2.87k
        int res_ = 0;                                                                                                              \
236
2.87k
        PARSE_INT(&res_, 100)                                                                                                      \
237
2.87k
        *valp_ = res_;                                                                                                             \
238
2.87k
        PARSE_INT(&res_, 10)                                                                                                       \
239
2.87k
        *valp_ += res_;                                                                                                            \
240
2.87k
        PARSE_INT(&res_, 1)                                                                                                        \
241
2.87k
        *valp_ += res_;                                                                                                            \
242
2.87k
    } while (0)
243
244
/* returned pointer is always within [buf, buf_end), or null */
245
static const char *parse_token(const char *buf, const char *buf_end, const char **token, size_t *token_len, char next_char,
246
                               int *ret)
247
63.8k
{
248
    /* We use pcmpestri to detect non-token characters. This instruction can take no more than eight character ranges (8*2*8=128
249
     * bits that is the size of a SSE register). Due to this restriction, characters `|` and `~` are handled in the slow loop. */
250
63.8k
    static const char ALIGNED(16) ranges[] = "\x00 "  /* control chars and up to SP */
251
63.8k
                                             "\"\""   /* 0x22 */
252
63.8k
                                             "()"     /* 0x28,0x29 */
253
63.8k
                                             ",,"     /* 0x2c */
254
63.8k
                                             "//"     /* 0x2f */
255
63.8k
                                             ":@"     /* 0x3a-0x40 */
256
63.8k
                                             "[]"     /* 0x5b-0x5d */
257
63.8k
                                             "{\xff"; /* 0x7b-0xff */
258
63.8k
    const char *buf_start = buf;
259
63.8k
    int found;
260
63.8k
    buf = findchar_fast(buf, buf_end, ranges, sizeof(ranges) - 1, &found);
261
63.8k
    if (!found) {
262
63.8k
        CHECK_EOF();
263
63.5k
    }
264
774k
    while (1) {
265
774k
        if (*buf == next_char) {
266
62.9k
            break;
267
711k
        } else if (!token_char_map[(unsigned char)*buf]) {
268
315
            *ret = -1;
269
315
            return NULL;
270
315
        }
271
710k
        ++buf;
272
710k
        CHECK_EOF();
273
710k
    }
274
62.9k
    *token = buf_start;
275
62.9k
    *token_len = buf - buf_start;
276
62.9k
    return buf;
277
63.5k
}
278
279
/* returned pointer is always within [buf, buf_end), or null */
280
static const char *parse_http_version(const char *buf, const char *buf_end, int *minor_version, int *ret)
281
16.7k
{
282
    /* we want at least [HTTP/1.<two chars>] to try to parse */
283
16.7k
    if (buf_end - buf < 9) {
284
111
        *ret = -2;
285
111
        return NULL;
286
111
    }
287
16.6k
    EXPECT_CHAR_NO_CHECK('H');
288
16.5k
    EXPECT_CHAR_NO_CHECK('T');
289
16.5k
    EXPECT_CHAR_NO_CHECK('T');
290
16.5k
    EXPECT_CHAR_NO_CHECK('P');
291
16.5k
    EXPECT_CHAR_NO_CHECK('/');
292
16.5k
    EXPECT_CHAR_NO_CHECK('1');
293
12.1k
    EXPECT_CHAR_NO_CHECK('.');
294
12.1k
    PARSE_INT(minor_version, 1);
295
12.1k
    return buf;
296
12.1k
}
297
298
static const char *parse_headers(const char *buf, const char *buf_end, struct phr_header *headers, size_t *num_headers,
299
                                 size_t max_headers, int *ret)
300
12.1k
{
301
61.3k
    for (;; ++*num_headers) {
302
61.3k
        CHECK_EOF();
303
61.1k
        if (*buf == '\015') {
304
3.39k
            ++buf;
305
3.39k
            EXPECT_CHAR('\012');
306
3.35k
            break;
307
57.7k
        } else if (*buf == '\012') {
308
7.82k
            ++buf;
309
7.82k
            break;
310
7.82k
        }
311
49.9k
        if (*num_headers == max_headers) {
312
1
            *ret = -1;
313
1
            return NULL;
314
1
        }
315
49.9k
        if (!(*num_headers != 0 && (*buf == ' ' || *buf == '\t'))) {
316
            /* parsing name, but do not discard SP before colon, see
317
             * http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */
318
48.4k
            if ((buf = parse_token(buf, buf_end, &headers[*num_headers].name, &headers[*num_headers].name_len, ':', ret)) == NULL) {
319
162
                return NULL;
320
162
            }
321
48.3k
            if (headers[*num_headers].name_len == 0) {
322
3
                *ret = -1;
323
3
                return NULL;
324
3
            }
325
48.3k
            ++buf;
326
55.0k
            for (;; ++buf) {
327
55.0k
                CHECK_EOF();
328
55.0k
                if (!(*buf == ' ' || *buf == '\t')) {
329
48.2k
                    break;
330
48.2k
                }
331
55.0k
            }
332
48.3k
        } else {
333
1.47k
            headers[*num_headers].name = NULL;
334
1.47k
            headers[*num_headers].name_len = 0;
335
1.47k
        }
336
49.7k
        const char *value;
337
49.7k
        size_t value_len;
338
49.7k
        if ((buf = get_token_to_eol(buf, buf_end, &value, &value_len, ret)) == NULL) {
339
437
            return NULL;
340
437
        }
341
        /* remove trailing SPs and HTABs */
342
49.2k
        const char *value_end = value + value_len;
343
51.6k
        for (; value_end != value; --value_end) {
344
18.8k
            const char c = *(value_end - 1);
345
18.8k
            if (!(c == ' ' || c == '\t')) {
346
16.4k
                break;
347
16.4k
            }
348
18.8k
        }
349
49.2k
        headers[*num_headers].value = value;
350
49.2k
        headers[*num_headers].value_len = value_end - value;
351
49.2k
    }
352
11.1k
    return buf;
353
12.1k
}
354
355
static const char *parse_request(const char *buf, const char *buf_end, const char **method, size_t *method_len, const char **path,
356
                                 size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers,
357
                                 size_t max_headers, int *ret)
358
15.3k
{
359
    /* skip first empty line (some clients add CRLF after POST content) */
360
15.3k
    CHECK_EOF();
361
15.3k
    if (*buf == '\015') {
362
99
        ++buf;
363
120
        EXPECT_CHAR('\012');
364
15.2k
    } else if (*buf == '\012') {
365
1.24k
        ++buf;
366
1.24k
    }
367
368
    /* parse request line */
369
15.3k
    if ((buf = parse_token(buf, buf_end, method, method_len, ' ', ret)) == NULL) {
370
751
        return NULL;
371
751
    }
372
14.8k
    do {
373
14.8k
        ++buf;
374
14.8k
        CHECK_EOF();
375
14.7k
    } while (*buf == ' ');
376
14.5k
    ADVANCE_TOKEN(*path, *path_len);
377
14.4k
    do {
378
14.4k
        ++buf;
379
14.4k
        CHECK_EOF();
380
14.4k
    } while (*buf == ' ');
381
14.2k
    if (*method_len == 0 || *path_len == 0) {
382
356
        *ret = -1;
383
356
        return NULL;
384
356
    }
385
13.8k
    if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
386
4.59k
        return NULL;
387
4.59k
    }
388
9.26k
    if (*buf == '\015') {
389
1.19k
        ++buf;
390
2.36k
        EXPECT_CHAR('\012');
391
8.07k
    } else if (*buf == '\012') {
392
8.05k
        ++buf;
393
8.05k
    } else {
394
23
        *ret = -1;
395
23
        return NULL;
396
23
    }
397
398
9.23k
    return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
399
9.26k
}
400
401
int phr_parse_request(const char *buf_start, size_t len, const char **method, size_t *method_len, const char **path,
402
                      size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers, size_t last_len)
403
15.6k
{
404
15.6k
    const char *buf = buf_start, *buf_end = buf_start + len;
405
15.6k
    size_t max_headers = *num_headers;
406
15.6k
    int r;
407
408
15.6k
    *method = NULL;
409
15.6k
    *method_len = 0;
410
15.6k
    *path = NULL;
411
15.6k
    *path_len = 0;
412
15.6k
    *minor_version = -1;
413
15.6k
    *num_headers = 0;
414
415
    /* if last_len != 0, check if the request is complete (a fast countermeasure
416
       againt slowloris */
417
15.6k
    if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
418
268
        return r;
419
268
    }
420
421
15.3k
    if ((buf = parse_request(buf, buf_end, method, method_len, path, path_len, minor_version, headers, num_headers, max_headers,
422
15.3k
                             &r)) == NULL) {
423
7.08k
        return r;
424
7.08k
    }
425
426
8.30k
    return (int)(buf - buf_start);
427
15.3k
}
428
429
static const char *parse_response(const char *buf, const char *buf_end, int *minor_version, int *status, const char **msg,
430
                                  size_t *msg_len, struct phr_header *headers, size_t *num_headers, size_t max_headers, int *ret)
431
2.87k
{
432
    /* parse "HTTP/1.x" */
433
2.87k
    if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
434
0
        return NULL;
435
0
    }
436
    /* skip space */
437
2.87k
    if (*buf != ' ') {
438
0
        *ret = -1;
439
0
        return NULL;
440
0
    }
441
2.87k
    do {
442
2.87k
        ++buf;
443
2.87k
        CHECK_EOF();
444
2.87k
    } while (*buf == ' ');
445
    /* parse status code, we want at least [:digit:][:digit:][:digit:]<other char> to try to parse */
446
2.87k
    if (buf_end - buf < 4) {
447
0
        *ret = -2;
448
0
        return NULL;
449
0
    }
450
2.87k
    PARSE_INT_3(status);
451
452
    /* get message including preceding space */
453
2.87k
    if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) {
454
0
        return NULL;
455
0
    }
456
2.87k
    if (*msg_len == 0) {
457
        /* ok */
458
2.87k
    } else if (**msg == ' ') {
459
        /* Remove preceding space. Successful return from `get_token_to_eol` guarantees that we would hit something other than SP
460
         * before running past the end of the given buffer. */
461
2.87k
        do {
462
2.87k
            ++*msg;
463
2.87k
            --*msg_len;
464
2.87k
        } while (**msg == ' ');
465
2.87k
    } else {
466
        /* garbage found after status code */
467
0
        *ret = -1;
468
0
        return NULL;
469
0
    }
470
471
2.87k
    return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
472
2.87k
}
473
474
int phr_parse_response(const char *buf_start, size_t len, int *minor_version, int *status, const char **msg, size_t *msg_len,
475
                       struct phr_header *headers, size_t *num_headers, size_t last_len)
476
2.87k
{
477
2.87k
    const char *buf = buf_start, *buf_end = buf + len;
478
2.87k
    size_t max_headers = *num_headers;
479
2.87k
    int r;
480
481
2.87k
    *minor_version = -1;
482
2.87k
    *status = 0;
483
2.87k
    *msg = NULL;
484
2.87k
    *msg_len = 0;
485
2.87k
    *num_headers = 0;
486
487
    /* if last_len != 0, check if the response is complete (a fast countermeasure
488
       against slowloris */
489
2.87k
    if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
490
0
        return r;
491
0
    }
492
493
2.87k
    if ((buf = parse_response(buf, buf_end, minor_version, status, msg, msg_len, headers, num_headers, max_headers, &r)) == NULL) {
494
0
        return r;
495
0
    }
496
497
2.87k
    return (int)(buf - buf_start);
498
2.87k
}
499
500
int phr_parse_headers(const char *buf_start, size_t len, struct phr_header *headers, size_t *num_headers, size_t last_len)
501
0
{
502
0
    const char *buf = buf_start, *buf_end = buf + len;
503
0
    size_t max_headers = *num_headers;
504
0
    int r;
505
506
0
    *num_headers = 0;
507
508
    /* if last_len != 0, check if the response is complete (a fast countermeasure
509
       against slowloris */
510
0
    if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
511
0
        return r;
512
0
    }
513
514
0
    if ((buf = parse_headers(buf, buf_end, headers, num_headers, max_headers, &r)) == NULL) {
515
0
        return r;
516
0
    }
517
518
0
    return (int)(buf - buf_start);
519
0
}
520
521
enum {
522
    CHUNKED_IN_CHUNK_SIZE,
523
    CHUNKED_IN_CHUNK_EXT,
524
    CHUNKED_IN_CHUNK_DATA,
525
    CHUNKED_IN_CHUNK_CRLF,
526
    CHUNKED_IN_TRAILERS_LINE_HEAD,
527
    CHUNKED_IN_TRAILERS_LINE_MIDDLE
528
};
529
530
static int decode_hex(int ch)
531
5.65k
{
532
5.65k
    if ('0' <= ch && ch <= '9') {
533
1.66k
        return ch - '0';
534
3.98k
    } else if ('A' <= ch && ch <= 'F') {
535
981
        return ch - 'A' + 0xa;
536
3.00k
    } else if ('a' <= ch && ch <= 'f') {
537
866
        return ch - 'a' + 0xa;
538
2.13k
    } else {
539
2.13k
        return -1;
540
2.13k
    }
541
5.65k
}
542
543
ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *_bufsz)
544
1.11k
{
545
1.11k
    size_t dst = 0, src = 0, bufsz = *_bufsz;
546
1.11k
    ssize_t ret = -2; /* incomplete */
547
548
1.11k
    decoder->_total_read += bufsz;
549
550
4.82k
    while (1) {
551
4.82k
        switch (decoder->_state) {
552
2.23k
        case CHUNKED_IN_CHUNK_SIZE:
553
5.74k
            for (;; ++src) {
554
5.74k
                int v;
555
5.74k
                if (src == bufsz)
556
94
                    goto Exit;
557
5.65k
                if ((v = decode_hex(buf[src])) == -1) {
558
2.13k
                    if (decoder->_hex_count == 0) {
559
18
                        ret = -1;
560
18
                        goto Exit;
561
18
                    }
562
                    /* the only characters that may appear after the chunk size are BWS, semicolon, or CRLF */
563
2.11k
                    switch (buf[src]) {
564
539
                    case ' ':
565
560
                    case '\011':
566
632
                    case ';':
567
1.87k
                    case '\012':
568
2.10k
                    case '\015':
569
2.10k
                        break;
570
9
                    default:
571
9
                        ret = -1;
572
9
                        goto Exit;
573
2.11k
                    }
574
2.10k
                    break;
575
2.11k
                }
576
3.51k
                if (decoder->_hex_count == sizeof(size_t) * 2) {
577
1
                    ret = -1;
578
1
                    goto Exit;
579
1
                }
580
3.51k
                decoder->bytes_left_in_chunk = decoder->bytes_left_in_chunk * 16 + v;
581
3.51k
                ++decoder->_hex_count;
582
3.51k
            }
583
2.10k
            decoder->_hex_count = 0;
584
2.10k
            decoder->_state = CHUNKED_IN_CHUNK_EXT;
585
        /* fallthru */
586
2.18k
        case CHUNKED_IN_CHUNK_EXT:
587
            /* RFC 7230 A.2 "Line folding in chunk extensions is disallowed" */
588
38.0k
            for (;; ++src) {
589
38.0k
                if (src == bufsz)
590
102
                    goto Exit;
591
37.9k
                if (buf[src] == '\012')
592
2.08k
                    break;
593
37.9k
            }
594
2.08k
            ++src;
595
2.08k
            if (decoder->bytes_left_in_chunk == 0) {
596
289
                if (decoder->consume_trailer) {
597
289
                    decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
598
289
                    break;
599
289
                } else {
600
0
                    goto Complete;
601
0
                }
602
289
            }
603
1.79k
            decoder->_state = CHUNKED_IN_CHUNK_DATA;
604
        /* fallthru */
605
1.92k
        case CHUNKED_IN_CHUNK_DATA: {
606
1.92k
            size_t avail = bufsz - src;
607
1.92k
            if (avail < decoder->bytes_left_in_chunk) {
608
280
                if (dst != src)
609
226
                    memmove(buf + dst, buf + src, avail);
610
280
                src += avail;
611
280
                dst += avail;
612
280
                decoder->bytes_left_in_chunk -= avail;
613
280
                goto Exit;
614
280
            }
615
1.64k
            if (dst != src)
616
1.56k
                memmove(buf + dst, buf + src, decoder->bytes_left_in_chunk);
617
1.64k
            src += decoder->bytes_left_in_chunk;
618
1.64k
            dst += decoder->bytes_left_in_chunk;
619
1.64k
            decoder->bytes_left_in_chunk = 0;
620
1.64k
            decoder->_state = CHUNKED_IN_CHUNK_CRLF;
621
1.64k
        }
622
        /* fallthru */
623
1.73k
        case CHUNKED_IN_CHUNK_CRLF:
624
1.94k
            for (;; ++src) {
625
1.94k
                if (src == bufsz)
626
123
                    goto Exit;
627
1.81k
                if (buf[src] != '\015')
628
1.61k
                    break;
629
1.81k
            }
630
1.61k
            if (buf[src] != '\012') {
631
25
                ret = -1;
632
25
                goto Exit;
633
25
            }
634
1.58k
            ++src;
635
1.58k
            decoder->_state = CHUNKED_IN_CHUNK_SIZE;
636
1.58k
            break;
637
2.20k
        case CHUNKED_IN_TRAILERS_LINE_HEAD:
638
2.48k
            for (;; ++src) {
639
2.48k
                if (src == bufsz)
640
102
                    goto Exit;
641
2.37k
                if (buf[src] != '\015')
642
2.10k
                    break;
643
2.37k
            }
644
2.10k
            if (buf[src++] == '\012')
645
214
                goto Complete;
646
1.88k
            decoder->_state = CHUNKED_IN_TRAILERS_LINE_MIDDLE;
647
        /* fallthru */
648
1.98k
        case CHUNKED_IN_TRAILERS_LINE_MIDDLE:
649
32.0k
            for (;; ++src) {
650
32.0k
                if (src == bufsz)
651
142
                    goto Exit;
652
31.9k
                if (buf[src] == '\012')
653
1.83k
                    break;
654
31.9k
            }
655
1.83k
            ++src;
656
1.83k
            decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
657
1.83k
            break;
658
0
        default:
659
0
            assert(!"decoder is corrupt");
660
4.82k
        }
661
4.82k
    }
662
663
214
Complete:
664
214
    ret = bufsz - src;
665
1.11k
Exit:
666
1.11k
    if (dst != src)
667
1.03k
        memmove(buf + dst, buf + src, bufsz - src);
668
1.11k
    *_bufsz = dst;
669
    /* if incomplete but the overhead of the chunked encoding is >=100KB and >80%, signal an error */
670
1.11k
    if (ret == -2) {
671
843
        decoder->_total_overhead += bufsz - dst;
672
843
        if (decoder->_total_overhead >= 100 * 1024 && decoder->_total_read - decoder->_total_overhead < decoder->_total_read / 4)
673
0
            ret = -1;
674
843
    }
675
1.11k
    return ret;
676
214
}
677
678
int phr_decode_chunked_is_in_data(struct phr_chunked_decoder *decoder)
679
0
{
680
0
    return decoder->_state == CHUNKED_IN_CHUNK_DATA;
681
0
}
682
683
#undef CHECK_EOF
684
#undef EXPECT_CHAR
685
#undef ADVANCE_TOKEN