Coverage Report

Created: 2025-07-18 06:42

/src/h2o/deps/picohttpparser/picohttpparser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase,
3
 *                         Shigeo Mitsunari
4
 *
5
 * The software is licensed under either the MIT License (below) or the Perl
6
 * license.
7
 *
8
 * Permission is hereby granted, free of charge, to any person obtaining a copy
9
 * of this software and associated documentation files (the "Software"), to
10
 * deal in the Software without restriction, including without limitation the
11
 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
12
 * sell copies of the Software, and to permit persons to whom the Software is
13
 * furnished to do so, subject to the following conditions:
14
 *
15
 * The above copyright notice and this permission notice shall be included in
16
 * all copies or substantial portions of the Software.
17
 *
18
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24
 * IN THE SOFTWARE.
25
 */
26
27
#include <assert.h>
28
#include <stddef.h>
29
#include <string.h>
30
#ifdef __SSE4_2__
31
#ifdef _MSC_VER
32
#include <nmmintrin.h>
33
#else
34
#include <x86intrin.h>
35
#endif
36
#endif
37
#include "picohttpparser.h"
38
39
#if __GNUC__ >= 3
40
745k
#define likely(x) __builtin_expect(!!(x), 1)
41
1.54M
#define unlikely(x) __builtin_expect(!!(x), 0)
42
#else
43
#define likely(x) (x)
44
#define unlikely(x) (x)
45
#endif
46
47
#ifdef _MSC_VER
48
#define ALIGNED(n) _declspec(align(n))
49
#else
50
#define ALIGNED(n) __attribute__((aligned(n)))
51
#endif
52
53
#define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)-040u < 0137u)
54
55
#define CHECK_EOF()                                                                                                                \
56
2.11M
    if (buf == buf_end) {                                                                                                          \
57
2.02k
        *ret = -2;                                                                                                                 \
58
2.02k
        return NULL;                                                                                                               \
59
2.02k
    }
60
61
#define EXPECT_CHAR_NO_CHECK(ch)                                                                                                   \
62
126k
    if (*buf++ != ch) {                                                                                                            \
63
4.60k
        *ret = -1;                                                                                                                 \
64
4.60k
        return NULL;                                                                                                               \
65
4.60k
    }
66
67
#define EXPECT_CHAR(ch)                                                                                                            \
68
16.6k
    CHECK_EOF();                                                                                                                   \
69
16.4k
    EXPECT_CHAR_NO_CHECK(ch);
70
71
#define ADVANCE_TOKEN(tok, toklen)                                                                                                 \
72
14.8k
    do {                                                                                                                           \
73
14.8k
        const char *tok_start = buf;                                                                                               \
74
14.8k
        static const char ALIGNED(16) ranges2[16] = "\000\040\177\177";                                                            \
75
14.8k
        int found2;                                                                                                                \
76
14.8k
        buf = findchar_fast(buf, buf_end, ranges2, 4, &found2);                                                                    \
77
14.8k
        if (!found2) {                                                                                                             \
78
14.8k
            CHECK_EOF();                                                                                                           \
79
14.8k
        }                                                                                                                          \
80
810k
        while (1) {                                                                                                                \
81
810k
            if (*buf == ' ') {                                                                                                     \
82
14.6k
                break;                                                                                                             \
83
796k
            } else if (unlikely(!IS_PRINTABLE_ASCII(*buf))) {                                                                      \
84
200k
                if ((unsigned char)*buf < '\040' || *buf == '\177') {                                                              \
85
123
                    *ret = -1;                                                                                                     \
86
123
                    return NULL;                                                                                                   \
87
123
                }                                                                                                                  \
88
200k
            }                                                                                                                      \
89
810k
            ++buf;                                                                                                                 \
90
796k
            CHECK_EOF();                                                                                                           \
91
795k
        }                                                                                                                          \
92
14.8k
        tok = tok_start;                                                                                                           \
93
14.6k
        toklen = buf - tok_start;                                                                                                  \
94
14.6k
    } while (0)
95
96
static const char *token_char_map = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
97
                                    "\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
98
                                    "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
99
                                    "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
100
                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
101
                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
102
                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
103
                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
104
105
static const char *findchar_fast(const char *buf, const char *buf_end, const char *ranges, size_t ranges_size, int *found)
106
88.8k
{
107
88.8k
    *found = 0;
108
#if __SSE4_2__
109
    if (likely(buf_end - buf >= 16)) {
110
        __m128i ranges16 = _mm_loadu_si128((const __m128i *)ranges);
111
112
        size_t left = (buf_end - buf) & ~15;
113
        do {
114
            __m128i b16 = _mm_loadu_si128((const __m128i *)buf);
115
            int r = _mm_cmpestri(ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS);
116
            if (unlikely(r != 16)) {
117
                buf += r;
118
                *found = 1;
119
                break;
120
            }
121
            buf += 16;
122
            left -= 16;
123
        } while (likely(left != 0));
124
    }
125
#else
126
    /* suppress unused parameter warning */
127
88.8k
    (void)buf_end;
128
88.8k
    (void)ranges;
129
88.8k
    (void)ranges_size;
130
88.8k
#endif
131
88.8k
    return buf;
132
88.8k
}
133
134
static const char *get_token_to_eol(const char *buf, const char *buf_end, const char **token, size_t *token_len, int *ret)
135
62.0k
{
136
62.0k
    const char *token_start = buf;
137
138
#ifdef __SSE4_2__
139
    static const char ALIGNED(16) ranges1[16] = "\0\010"    /* allow HT */
140
                                                "\012\037"  /* allow SP and up to but not including DEL */
141
                                                "\177\177"; /* allow chars w. MSB set */
142
    int found;
143
    buf = findchar_fast(buf, buf_end, ranges1, 6, &found);
144
    if (found)
145
        goto FOUND_CTL;
146
#else
147
    /* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */
148
227k
    while (likely(buf_end - buf >= 8)) {
149
222k
#define DOIT()                                                                                                                     \
150
604k
    do {                                                                                                                           \
151
604k
        if (unlikely(!IS_PRINTABLE_ASCII(*buf)))                                                                                   \
152
604k
            goto NonPrintable;                                                                                                     \
153
604k
        ++buf;                                                                                                                     \
154
414k
    } while (0)
155
222k
        DOIT();
156
75.0k
        DOIT();
157
67.2k
        DOIT();
158
59.9k
        DOIT();
159
51.7k
        DOIT();
160
48.5k
        DOIT();
161
42.3k
        DOIT();
162
36.8k
        DOIT();
163
32.8k
#undef DOIT
164
32.8k
        continue;
165
190k
    NonPrintable:
166
190k
        if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
167
57.8k
            goto FOUND_CTL;
168
57.8k
        }
169
132k
        ++buf;
170
132k
    }
171
4.21k
#endif
172
7.97k
    for (;; ++buf) {
173
7.97k
        CHECK_EOF();
174
7.68k
        if (unlikely(!IS_PRINTABLE_ASCII(*buf))) {
175
4.88k
            if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
176
3.91k
                goto FOUND_CTL;
177
3.91k
            }
178
4.88k
        }
179
7.68k
    }
180
61.7k
FOUND_CTL:
181
61.7k
    if (likely(*buf == '\015')) {
182
10.3k
        ++buf;
183
20.5k
        EXPECT_CHAR('\012');
184
20.5k
        *token_len = buf - 2 - token_start;
185
51.3k
    } else if (*buf == '\012') {
186
51.3k
        *token_len = buf - token_start;
187
51.3k
        ++buf;
188
51.3k
    } else {
189
65
        *ret = -1;
190
65
        return NULL;
191
65
    }
192
61.6k
    *token = token_start;
193
194
61.6k
    return buf;
195
61.7k
}
196
197
static const char *is_complete(const char *buf, const char *buf_end, size_t last_len, int *ret)
198
1.66k
{
199
1.66k
    int ret_cnt = 0;
200
1.66k
    buf = last_len < 3 ? buf : buf + last_len - 3;
201
202
272k
    while (1) {
203
272k
        CHECK_EOF();
204
272k
        if (*buf == '\015') {
205
1.48k
            ++buf;
206
1.48k
            CHECK_EOF();
207
2.84k
            EXPECT_CHAR('\012');
208
2.84k
            ++ret_cnt;
209
271k
        } else if (*buf == '\012') {
210
8.65k
            ++buf;
211
8.65k
            ++ret_cnt;
212
262k
        } else {
213
262k
            ++buf;
214
262k
            ret_cnt = 0;
215
262k
        }
216
272k
        if (ret_cnt == 2) {
217
1.30k
            return buf;
218
1.30k
        }
219
272k
    }
220
221
0
    *ret = -2;
222
0
    return NULL;
223
1.66k
}
224
225
#define PARSE_INT(valp_, mul_)                                                                                                     \
226
18.6k
    if (*buf < '0' || '9' < *buf) {                                                                                                \
227
15
        buf++;                                                                                                                     \
228
15
        *ret = -1;                                                                                                                 \
229
15
        return NULL;                                                                                                               \
230
15
    }                                                                                                                              \
231
18.6k
    *(valp_) = (mul_) * (*buf++ - '0');
232
233
#define PARSE_INT_3(valp_)                                                                                                         \
234
2.28k
    do {                                                                                                                           \
235
2.28k
        int res_ = 0;                                                                                                              \
236
2.28k
        PARSE_INT(&res_, 100)                                                                                                      \
237
2.28k
        *valp_ = res_;                                                                                                             \
238
2.28k
        PARSE_INT(&res_, 10)                                                                                                       \
239
2.28k
        *valp_ += res_;                                                                                                            \
240
2.28k
        PARSE_INT(&res_, 1)                                                                                                        \
241
2.28k
        *valp_ += res_;                                                                                                            \
242
2.28k
    } while (0)
243
244
/* returned pointer is always within [buf, buf_end), or null */
245
static const char *parse_token(const char *buf, const char *buf_end, const char **token, size_t *token_len, char next_char,
246
                               int *ret)
247
74.0k
{
248
    /* We use pcmpestri to detect non-token characters. This instruction can take no more than eight character ranges (8*2*8=128
249
     * bits that is the size of a SSE register). Due to this restriction, characters `|` and `~` are handled in the slow loop. */
250
74.0k
    static const char ALIGNED(16) ranges[] = "\x00 "  /* control chars and up to SP */
251
74.0k
                                             "\"\""   /* 0x22 */
252
74.0k
                                             "()"     /* 0x28,0x29 */
253
74.0k
                                             ",,"     /* 0x2c */
254
74.0k
                                             "//"     /* 0x2f */
255
74.0k
                                             ":@"     /* 0x3a-0x40 */
256
74.0k
                                             "[]"     /* 0x5b-0x5d */
257
74.0k
                                             "{\xff"; /* 0x7b-0xff */
258
74.0k
    const char *buf_start = buf;
259
74.0k
    int found;
260
74.0k
    buf = findchar_fast(buf, buf_end, ranges, sizeof(ranges) - 1, &found);
261
74.0k
    if (!found) {
262
74.0k
        CHECK_EOF();
263
73.8k
    }
264
820k
    while (1) {
265
820k
        if (*buf == next_char) {
266
73.0k
            break;
267
747k
        } else if (!token_char_map[(unsigned char)*buf]) {
268
360
            *ret = -1;
269
360
            return NULL;
270
360
        }
271
747k
        ++buf;
272
747k
        CHECK_EOF();
273
746k
    }
274
73.0k
    *token = buf_start;
275
73.0k
    *token_len = buf - buf_start;
276
73.0k
    return buf;
277
73.8k
}
278
279
/* returned pointer is always within [buf, buf_end), or null */
280
static const char *parse_http_version(const char *buf, const char *buf_end, int *minor_version, int *ret)
281
16.4k
{
282
    /* we want at least [HTTP/1.<two chars>] to try to parse */
283
16.4k
    if (buf_end - buf < 9) {
284
113
        *ret = -2;
285
113
        return NULL;
286
113
    }
287
16.3k
    EXPECT_CHAR_NO_CHECK('H');
288
16.3k
    EXPECT_CHAR_NO_CHECK('T');
289
16.3k
    EXPECT_CHAR_NO_CHECK('T');
290
16.3k
    EXPECT_CHAR_NO_CHECK('P');
291
16.2k
    EXPECT_CHAR_NO_CHECK('/');
292
16.2k
    EXPECT_CHAR_NO_CHECK('1');
293
11.8k
    EXPECT_CHAR_NO_CHECK('.');
294
11.8k
    PARSE_INT(minor_version, 1);
295
11.8k
    return buf;
296
11.8k
}
297
298
static const char *parse_headers(const char *buf, const char *buf_end, struct phr_header *headers, size_t *num_headers,
299
                                 size_t max_headers, int *ret)
300
11.7k
{
301
71.0k
    for (;; ++*num_headers) {
302
71.0k
        CHECK_EOF();
303
70.8k
        if (*buf == '\015') {
304
3.13k
            ++buf;
305
3.13k
            EXPECT_CHAR('\012');
306
3.09k
            break;
307
67.7k
        } else if (*buf == '\012') {
308
7.65k
            ++buf;
309
7.65k
            break;
310
7.65k
        }
311
60.0k
        if (*num_headers == max_headers) {
312
1
            *ret = -1;
313
1
            return NULL;
314
1
        }
315
60.0k
        if (!(*num_headers != 0 && (*buf == ' ' || *buf == '\t'))) {
316
            /* parsing name, but do not discard SP before colon, see
317
             * http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */
318
58.3k
            if ((buf = parse_token(buf, buf_end, &headers[*num_headers].name, &headers[*num_headers].name_len, ':', ret)) == NULL) {
319
227
                return NULL;
320
227
            }
321
58.1k
            if (headers[*num_headers].name_len == 0) {
322
6
                *ret = -1;
323
6
                return NULL;
324
6
            }
325
58.1k
            ++buf;
326
66.1k
            for (;; ++buf) {
327
66.1k
                CHECK_EOF();
328
66.0k
                if (!(*buf == ' ' || *buf == '\t')) {
329
58.0k
                    break;
330
58.0k
                }
331
66.0k
            }
332
58.1k
        } else {
333
1.71k
            headers[*num_headers].name = NULL;
334
1.71k
            headers[*num_headers].name_len = 0;
335
1.71k
        }
336
59.7k
        const char *value;
337
59.7k
        size_t value_len;
338
59.7k
        if ((buf = get_token_to_eol(buf, buf_end, &value, &value_len, ret)) == NULL) {
339
408
            return NULL;
340
408
        }
341
        /* remove trailing SPs and HTABs */
342
59.3k
        const char *value_end = value + value_len;
343
61.9k
        for (; value_end != value; --value_end) {
344
23.5k
            const char c = *(value_end - 1);
345
23.5k
            if (!(c == ' ' || c == '\t')) {
346
21.0k
                break;
347
21.0k
            }
348
23.5k
        }
349
59.3k
        headers[*num_headers].value = value;
350
59.3k
        headers[*num_headers].value_len = value_end - value;
351
59.3k
    }
352
10.7k
    return buf;
353
11.7k
}
354
355
static const char *parse_request(const char *buf, const char *buf_end, const char **method, size_t *method_len, const char **path,
356
                                 size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers,
357
                                 size_t max_headers, int *ret)
358
15.7k
{
359
    /* skip first empty line (some clients add CRLF after POST content) */
360
15.7k
    CHECK_EOF();
361
15.7k
    if (*buf == '\015') {
362
172
        ++buf;
363
197
        EXPECT_CHAR('\012');
364
15.5k
    } else if (*buf == '\012') {
365
1.35k
        ++buf;
366
1.35k
    }
367
368
    /* parse request line */
369
15.6k
    if ((buf = parse_token(buf, buf_end, method, method_len, ' ', ret)) == NULL) {
370
791
        return NULL;
371
791
    }
372
15.1k
    do {
373
15.1k
        ++buf;
374
15.1k
        CHECK_EOF();
375
15.0k
    } while (*buf == ' ');
376
14.8k
    ADVANCE_TOKEN(*path, *path_len);
377
14.8k
    do {
378
14.8k
        ++buf;
379
14.8k
        CHECK_EOF();
380
14.7k
    } while (*buf == ' ');
381
14.5k
    if (*method_len == 0 || *path_len == 0) {
382
332
        *ret = -1;
383
332
        return NULL;
384
332
    }
385
14.1k
    if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
386
4.67k
        return NULL;
387
4.67k
    }
388
9.51k
    if (*buf == '\015') {
389
1.57k
        ++buf;
390
3.07k
        EXPECT_CHAR('\012');
391
7.94k
    } else if (*buf == '\012') {
392
7.93k
        ++buf;
393
7.93k
    } else {
394
12
        *ret = -1;
395
12
        return NULL;
396
12
    }
397
398
9.46k
    return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
399
9.51k
}
400
401
int phr_parse_request(const char *buf_start, size_t len, const char **method, size_t *method_len, const char **path,
402
                      size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers, size_t last_len)
403
16.1k
{
404
16.1k
    const char *buf = buf_start, *buf_end = buf_start + len;
405
16.1k
    size_t max_headers = *num_headers;
406
16.1k
    int r;
407
408
16.1k
    *method = NULL;
409
16.1k
    *method_len = 0;
410
16.1k
    *path = NULL;
411
16.1k
    *path_len = 0;
412
16.1k
    *minor_version = -1;
413
16.1k
    *num_headers = 0;
414
415
    /* if last_len != 0, check if the request is complete (a fast countermeasure
416
       againt slowloris */
417
16.1k
    if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
418
363
        return r;
419
363
    }
420
421
15.7k
    if ((buf = parse_request(buf, buf_end, method, method_len, path, path_len, minor_version, headers, num_headers, max_headers,
422
15.7k
                             &r)) == NULL) {
423
7.29k
        return r;
424
7.29k
    }
425
426
8.46k
    return (int)(buf - buf_start);
427
15.7k
}
428
429
static const char *parse_response(const char *buf, const char *buf_end, int *minor_version, int *status, const char **msg,
430
                                  size_t *msg_len, struct phr_header *headers, size_t *num_headers, size_t max_headers, int *ret)
431
2.28k
{
432
    /* parse "HTTP/1.x" */
433
2.28k
    if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
434
0
        return NULL;
435
0
    }
436
    /* skip space */
437
2.28k
    if (*buf != ' ') {
438
0
        *ret = -1;
439
0
        return NULL;
440
0
    }
441
2.28k
    do {
442
2.28k
        ++buf;
443
2.28k
        CHECK_EOF();
444
2.28k
    } while (*buf == ' ');
445
    /* parse status code, we want at least [:digit:][:digit:][:digit:]<other char> to try to parse */
446
2.28k
    if (buf_end - buf < 4) {
447
0
        *ret = -2;
448
0
        return NULL;
449
0
    }
450
2.28k
    PARSE_INT_3(status);
451
452
    /* get message including preceding space */
453
2.28k
    if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) {
454
0
        return NULL;
455
0
    }
456
2.28k
    if (*msg_len == 0) {
457
        /* ok */
458
2.28k
    } else if (**msg == ' ') {
459
        /* Remove preceding space. Successful return from `get_token_to_eol` guarantees that we would hit something other than SP
460
         * before running past the end of the given buffer. */
461
2.28k
        do {
462
2.28k
            ++*msg;
463
2.28k
            --*msg_len;
464
2.28k
        } while (**msg == ' ');
465
2.28k
    } else {
466
        /* garbage found after status code */
467
0
        *ret = -1;
468
0
        return NULL;
469
0
    }
470
471
2.28k
    return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
472
2.28k
}
473
474
int phr_parse_response(const char *buf_start, size_t len, int *minor_version, int *status, const char **msg, size_t *msg_len,
475
                       struct phr_header *headers, size_t *num_headers, size_t last_len)
476
2.28k
{
477
2.28k
    const char *buf = buf_start, *buf_end = buf + len;
478
2.28k
    size_t max_headers = *num_headers;
479
2.28k
    int r;
480
481
2.28k
    *minor_version = -1;
482
2.28k
    *status = 0;
483
2.28k
    *msg = NULL;
484
2.28k
    *msg_len = 0;
485
2.28k
    *num_headers = 0;
486
487
    /* if last_len != 0, check if the response is complete (a fast countermeasure
488
       against slowloris */
489
2.28k
    if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
490
0
        return r;
491
0
    }
492
493
2.28k
    if ((buf = parse_response(buf, buf_end, minor_version, status, msg, msg_len, headers, num_headers, max_headers, &r)) == NULL) {
494
0
        return r;
495
0
    }
496
497
2.28k
    return (int)(buf - buf_start);
498
2.28k
}
499
500
int phr_parse_headers(const char *buf_start, size_t len, struct phr_header *headers, size_t *num_headers, size_t last_len)
501
0
{
502
0
    const char *buf = buf_start, *buf_end = buf + len;
503
0
    size_t max_headers = *num_headers;
504
0
    int r;
505
506
0
    *num_headers = 0;
507
508
    /* if last_len != 0, check if the response is complete (a fast countermeasure
509
       against slowloris */
510
0
    if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
511
0
        return r;
512
0
    }
513
514
0
    if ((buf = parse_headers(buf, buf_end, headers, num_headers, max_headers, &r)) == NULL) {
515
0
        return r;
516
0
    }
517
518
0
    return (int)(buf - buf_start);
519
0
}
520
521
enum {
522
    CHUNKED_IN_CHUNK_SIZE,
523
    CHUNKED_IN_CHUNK_EXT,
524
    CHUNKED_IN_CHUNK_HEADER_EXPECT_LF,
525
    CHUNKED_IN_CHUNK_DATA,
526
    CHUNKED_IN_CHUNK_DATA_EXPECT_CR,
527
    CHUNKED_IN_CHUNK_DATA_EXPECT_LF,
528
    CHUNKED_IN_TRAILERS_LINE_HEAD,
529
    CHUNKED_IN_TRAILERS_LINE_MIDDLE
530
};
531
532
static int decode_hex(int ch)
533
6.69k
{
534
6.69k
    if ('0' <= ch && ch <= '9') {
535
786
        return ch - '0';
536
5.91k
    } else if ('A' <= ch && ch <= 'F') {
537
621
        return ch - 'A' + 0xa;
538
5.28k
    } else if ('a' <= ch && ch <= 'f') {
539
2.50k
        return ch - 'a' + 0xa;
540
2.78k
    } else {
541
2.78k
        return -1;
542
2.78k
    }
543
6.69k
}
544
545
ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *_bufsz)
546
1.01k
{
547
1.01k
    size_t dst = 0, src = 0, bufsz = *_bufsz;
548
1.01k
    ssize_t ret = -2; /* incomplete */
549
550
1.01k
    decoder->_total_read += bufsz;
551
552
6.80k
    while (1) {
553
6.80k
        switch (decoder->_state) {
554
2.89k
        case CHUNKED_IN_CHUNK_SIZE:
555
6.80k
            for (;; ++src) {
556
6.80k
                int v;
557
6.80k
                if (src == bufsz)
558
110
                    goto Exit;
559
6.69k
                if ((v = decode_hex(buf[src])) == -1) {
560
2.78k
                    if (decoder->_hex_count == 0) {
561
12
                        ret = -1;
562
12
                        goto Exit;
563
12
                    }
564
                    /* the only characters that may appear after the chunk size are BWS, semicolon, or CRLF */
565
2.77k
                    switch (buf[src]) {
566
2.18k
                    case ' ':
567
2.20k
                    case '\011':
568
2.22k
                    case ';':
569
2.24k
                    case '\012':
570
2.75k
                    case '\015':
571
2.75k
                        break;
572
19
                    default:
573
19
                        ret = -1;
574
19
                        goto Exit;
575
2.77k
                    }
576
2.75k
                    break;
577
2.77k
                }
578
3.91k
                if (decoder->_hex_count == sizeof(size_t) * 2) {
579
4
                    ret = -1;
580
4
                    goto Exit;
581
4
                }
582
3.90k
                decoder->bytes_left_in_chunk = decoder->bytes_left_in_chunk * 16 + v;
583
3.90k
                ++decoder->_hex_count;
584
3.90k
            }
585
2.75k
            decoder->_hex_count = 0;
586
2.75k
            decoder->_state = CHUNKED_IN_CHUNK_EXT;
587
        /* fallthru */
588
2.80k
        case CHUNKED_IN_CHUNK_EXT:
589
            /* RFC 7230 A.2 "Line folding in chunk extensions is disallowed" */
590
61.8k
            for (;; ++src) {
591
61.8k
                if (src == bufsz)
592
64
                    goto Exit;
593
61.7k
                if (buf[src] == '\015') {
594
2.71k
                    break;
595
59.0k
                } else if (buf[src] == '\012') {
596
28
                    ret = -1;
597
28
                    goto Exit;
598
28
                }
599
61.7k
            }
600
2.71k
            ++src;
601
2.71k
            decoder->_state = CHUNKED_IN_CHUNK_HEADER_EXPECT_LF;
602
        /* fallthru */
603
2.72k
        case CHUNKED_IN_CHUNK_HEADER_EXPECT_LF:
604
2.72k
            if (src == bufsz)
605
21
                goto Exit;
606
2.70k
            if (buf[src] != '\012') {
607
11
                ret = -1;
608
11
                goto Exit;
609
11
            }
610
2.69k
            ++src;
611
2.69k
            if (decoder->bytes_left_in_chunk == 0) {
612
201
                if (decoder->consume_trailer) {
613
201
                    decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
614
201
                    break;
615
201
                } else {
616
0
                    goto Complete;
617
0
                }
618
201
            }
619
2.49k
            decoder->_state = CHUNKED_IN_CHUNK_DATA;
620
        /* fallthru */
621
2.61k
        case CHUNKED_IN_CHUNK_DATA: {
622
2.61k
            size_t avail = bufsz - src;
623
2.61k
            if (avail < decoder->bytes_left_in_chunk) {
624
217
                if (dst != src)
625
134
                    memmove(buf + dst, buf + src, avail);
626
217
                src += avail;
627
217
                dst += avail;
628
217
                decoder->bytes_left_in_chunk -= avail;
629
217
                goto Exit;
630
217
            }
631
2.40k
            if (dst != src)
632
2.36k
                memmove(buf + dst, buf + src, decoder->bytes_left_in_chunk);
633
2.40k
            src += decoder->bytes_left_in_chunk;
634
2.40k
            dst += decoder->bytes_left_in_chunk;
635
2.40k
            decoder->bytes_left_in_chunk = 0;
636
2.40k
            decoder->_state = CHUNKED_IN_CHUNK_DATA_EXPECT_CR;
637
2.40k
        }
638
        /* fallthru */
639
2.40k
        case CHUNKED_IN_CHUNK_DATA_EXPECT_CR:
640
2.40k
            if (src == bufsz)
641
5
                goto Exit;
642
2.40k
            if (buf[src] != '\015') {
643
49
                ret = -1;
644
49
                goto Exit;
645
49
            }
646
2.35k
            ++src;
647
2.35k
            decoder->_state = CHUNKED_IN_CHUNK_DATA_EXPECT_LF;
648
        /* fallthru */
649
2.35k
        case CHUNKED_IN_CHUNK_DATA_EXPECT_LF:
650
2.35k
            if (src == bufsz)
651
4
                goto Exit;
652
2.35k
            if (buf[src] != '\012') {
653
9
                ret = -1;
654
9
                goto Exit;
655
9
            }
656
2.34k
            ++src;
657
2.34k
            decoder->_state = CHUNKED_IN_CHUNK_SIZE;
658
2.34k
            break;
659
3.49k
        case CHUNKED_IN_TRAILERS_LINE_HEAD:
660
3.91k
            for (;; ++src) {
661
3.91k
                if (src == bufsz)
662
61
                    goto Exit;
663
3.85k
                if (buf[src] != '\015')
664
3.43k
                    break;
665
3.85k
            }
666
3.43k
            if (buf[src++] == '\012')
667
130
                goto Complete;
668
3.30k
            decoder->_state = CHUNKED_IN_TRAILERS_LINE_MIDDLE;
669
        /* fallthru */
670
3.51k
        case CHUNKED_IN_TRAILERS_LINE_MIDDLE:
671
59.7k
            for (;; ++src) {
672
59.7k
                if (src == bufsz)
673
271
                    goto Exit;
674
59.5k
                if (buf[src] == '\012')
675
3.24k
                    break;
676
59.5k
            }
677
3.24k
            ++src;
678
3.24k
            decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
679
3.24k
            break;
680
0
        default:
681
0
            assert(!"decoder is corrupt");
682
6.80k
        }
683
6.80k
    }
684
685
130
Complete:
686
130
    ret = bufsz - src;
687
1.01k
Exit:
688
1.01k
    if (dst != src)
689
917
        memmove(buf + dst, buf + src, bufsz - src);
690
1.01k
    *_bufsz = dst;
691
    /* if incomplete but the overhead of the chunked encoding is >=100KB and >80%, signal an error */
692
1.01k
    if (ret == -2) {
693
753
        decoder->_total_overhead += bufsz - dst;
694
753
        if (decoder->_total_overhead >= 100 * 1024 && decoder->_total_read - decoder->_total_overhead < decoder->_total_read / 4)
695
0
            ret = -1;
696
753
    }
697
1.01k
    return ret;
698
130
}
699
700
int phr_decode_chunked_is_in_data(struct phr_chunked_decoder *decoder)
701
0
{
702
0
    return decoder->_state == CHUNKED_IN_CHUNK_DATA;
703
0
}
704
705
#undef CHECK_EOF
706
#undef EXPECT_CHAR
707
#undef ADVANCE_TOKEN