Coverage Report

Created: 2025-07-11 06:24

/src/h2o/deps/picohttpparser/picohttpparser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase,
3
 *                         Shigeo Mitsunari
4
 *
5
 * The software is licensed under either the MIT License (below) or the Perl
6
 * license.
7
 *
8
 * Permission is hereby granted, free of charge, to any person obtaining a copy
9
 * of this software and associated documentation files (the "Software"), to
10
 * deal in the Software without restriction, including without limitation the
11
 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
12
 * sell copies of the Software, and to permit persons to whom the Software is
13
 * furnished to do so, subject to the following conditions:
14
 *
15
 * The above copyright notice and this permission notice shall be included in
16
 * all copies or substantial portions of the Software.
17
 *
18
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24
 * IN THE SOFTWARE.
25
 */
26
27
#include <assert.h>
28
#include <stddef.h>
29
#include <string.h>
30
#ifdef __SSE4_2__
31
#ifdef _MSC_VER
32
#include <nmmintrin.h>
33
#else
34
#include <x86intrin.h>
35
#endif
36
#endif
37
#include "picohttpparser.h"
38
39
#if __GNUC__ >= 3
40
4.68k
#define likely(x) __builtin_expect(!!(x), 1)
41
4.68k
#define unlikely(x) __builtin_expect(!!(x), 0)
42
#else
43
#define likely(x) (x)
44
#define unlikely(x) (x)
45
#endif
46
47
#ifdef _MSC_VER
48
#define ALIGNED(n) _declspec(align(n))
49
#else
50
#define ALIGNED(n) __attribute__((aligned(n)))
51
#endif
52
53
#define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)-040u < 0137u)
54
55
#define CHECK_EOF()                                                                                                                \
56
8.89k
    if (buf == buf_end) {                                                                                                          \
57
0
        *ret = -2;                                                                                                                 \
58
0
        return NULL;                                                                                                               \
59
0
    }
60
61
#define EXPECT_CHAR_NO_CHECK(ch)                                                                                                   \
62
4.68k
    if (*buf++ != ch) {                                                                                                            \
63
0
        *ret = -1;                                                                                                                 \
64
0
        return NULL;                                                                                                               \
65
0
    }
66
67
#define EXPECT_CHAR(ch)                                                                                                            \
68
1.40k
    CHECK_EOF();                                                                                                                   \
69
1.40k
    EXPECT_CHAR_NO_CHECK(ch);
70
71
#define ADVANCE_TOKEN(tok, toklen)                                                                                                 \
72
0
    do {                                                                                                                           \
73
0
        const char *tok_start = buf;                                                                                               \
74
0
        static const char ALIGNED(16) ranges2[16] = "\000\040\177\177";                                                            \
75
0
        int found2;                                                                                                                \
76
0
        buf = findchar_fast(buf, buf_end, ranges2, 4, &found2);                                                                    \
77
0
        if (!found2) {                                                                                                             \
78
0
            CHECK_EOF();                                                                                                           \
79
0
        }                                                                                                                          \
80
0
        while (1) {                                                                                                                \
81
0
            if (*buf == ' ') {                                                                                                     \
82
0
                break;                                                                                                             \
83
0
            } else if (unlikely(!IS_PRINTABLE_ASCII(*buf))) {                                                                      \
84
0
                if ((unsigned char)*buf < '\040' || *buf == '\177') {                                                              \
85
0
                    *ret = -1;                                                                                                     \
86
0
                    return NULL;                                                                                                   \
87
0
                }                                                                                                                  \
88
0
            }                                                                                                                      \
89
0
            ++buf;                                                                                                                 \
90
0
            CHECK_EOF();                                                                                                           \
91
0
        }                                                                                                                          \
92
0
        tok = tok_start;                                                                                                           \
93
0
        toklen = buf - tok_start;                                                                                                  \
94
0
    } while (0)
95
96
static const char *token_char_map = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
97
                                    "\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
98
                                    "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
99
                                    "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
100
                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
101
                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
102
                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
103
                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
104
105
static const char *findchar_fast(const char *buf, const char *buf_end, const char *ranges, size_t ranges_size, int *found)
106
468
{
107
468
    *found = 0;
108
#if __SSE4_2__
109
    if (likely(buf_end - buf >= 16)) {
110
        __m128i ranges16 = _mm_loadu_si128((const __m128i *)ranges);
111
112
        size_t left = (buf_end - buf) & ~15;
113
        do {
114
            __m128i b16 = _mm_loadu_si128((const __m128i *)buf);
115
            int r = _mm_cmpestri(ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS);
116
            if (unlikely(r != 16)) {
117
                buf += r;
118
                *found = 1;
119
                break;
120
            }
121
            buf += 16;
122
            left -= 16;
123
        } while (likely(left != 0));
124
    }
125
#else
126
    /* suppress unused parameter warning */
127
468
    (void)buf_end;
128
468
    (void)ranges;
129
468
    (void)ranges_size;
130
468
#endif
131
468
    return buf;
132
468
}
133
134
static const char *get_token_to_eol(const char *buf, const char *buf_end, const char **token, size_t *token_len, int *ret)
135
936
{
136
936
    const char *token_start = buf;
137
138
#ifdef __SSE4_2__
139
    static const char ALIGNED(16) ranges1[16] = "\0\010"    /* allow HT */
140
                                                "\012\037"  /* allow SP and up to but not including DEL */
141
                                                "\177\177"; /* allow chars w. MSB set */
142
    int found;
143
    buf = findchar_fast(buf, buf_end, ranges1, 6, &found);
144
    if (found)
145
        goto FOUND_CTL;
146
#else
147
    /* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */
148
936
    while (likely(buf_end - buf >= 8)) {
149
936
#define DOIT()                                                                                                                     \
150
4.68k
    do {                                                                                                                           \
151
4.68k
        if (unlikely(!IS_PRINTABLE_ASCII(*buf)))                                                                                   \
152
4.68k
            goto NonPrintable;                                                                                                     \
153
4.68k
        ++buf;                                                                                                                     \
154
3.74k
    } while (0)
155
936
        DOIT();
156
936
        DOIT();
157
936
        DOIT();
158
936
        DOIT();
159
468
        DOIT();
160
468
        DOIT();
161
0
        DOIT();
162
0
        DOIT();
163
0
#undef DOIT
164
0
        continue;
165
936
    NonPrintable:
166
936
        if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
167
936
            goto FOUND_CTL;
168
936
        }
169
0
        ++buf;
170
0
    }
171
0
#endif
172
0
    for (;; ++buf) {
173
0
        CHECK_EOF();
174
0
        if (unlikely(!IS_PRINTABLE_ASCII(*buf))) {
175
0
            if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
176
0
                goto FOUND_CTL;
177
0
            }
178
0
        }
179
0
    }
180
936
FOUND_CTL:
181
936
    if (likely(*buf == '\015')) {
182
936
        ++buf;
183
1.87k
        EXPECT_CHAR('\012');
184
1.87k
        *token_len = buf - 2 - token_start;
185
1.87k
    } else if (*buf == '\012') {
186
0
        *token_len = buf - token_start;
187
0
        ++buf;
188
0
    } else {
189
0
        *ret = -1;
190
0
        return NULL;
191
0
    }
192
936
    *token = token_start;
193
194
936
    return buf;
195
936
}
196
197
static const char *is_complete(const char *buf, const char *buf_end, size_t last_len, int *ret)
198
0
{
199
0
    int ret_cnt = 0;
200
0
    buf = last_len < 3 ? buf : buf + last_len - 3;
201
202
0
    while (1) {
203
0
        CHECK_EOF();
204
0
        if (*buf == '\015') {
205
0
            ++buf;
206
0
            CHECK_EOF();
207
0
            EXPECT_CHAR('\012');
208
0
            ++ret_cnt;
209
0
        } else if (*buf == '\012') {
210
0
            ++buf;
211
0
            ++ret_cnt;
212
0
        } else {
213
0
            ++buf;
214
0
            ret_cnt = 0;
215
0
        }
216
0
        if (ret_cnt == 2) {
217
0
            return buf;
218
0
        }
219
0
    }
220
221
0
    *ret = -2;
222
0
    return NULL;
223
0
}
224
225
#define PARSE_INT(valp_, mul_)                                                                                                     \
226
1.87k
    if (*buf < '0' || '9' < *buf) {                                                                                                \
227
0
        buf++;                                                                                                                     \
228
0
        *ret = -1;                                                                                                                 \
229
0
        return NULL;                                                                                                               \
230
0
    }                                                                                                                              \
231
1.87k
    *(valp_) = (mul_) * (*buf++ - '0');
232
233
#define PARSE_INT_3(valp_)                                                                                                         \
234
468
    do {                                                                                                                           \
235
468
        int res_ = 0;                                                                                                              \
236
468
        PARSE_INT(&res_, 100)                                                                                                      \
237
468
        *valp_ = res_;                                                                                                             \
238
468
        PARSE_INT(&res_, 10)                                                                                                       \
239
468
        *valp_ += res_;                                                                                                            \
240
468
        PARSE_INT(&res_, 1)                                                                                                        \
241
468
        *valp_ += res_;                                                                                                            \
242
468
    } while (0)
243
244
/* returned pointer is always within [buf, buf_end), or null */
245
static const char *parse_token(const char *buf, const char *buf_end, const char **token, size_t *token_len, char next_char,
246
                               int *ret)
247
468
{
248
    /* We use pcmpestri to detect non-token characters. This instruction can take no more than eight character ranges (8*2*8=128
249
     * bits that is the size of a SSE register). Due to this restriction, characters `|` and `~` are handled in the slow loop. */
250
468
    static const char ALIGNED(16) ranges[] = "\x00 "  /* control chars and up to SP */
251
468
                                             "\"\""   /* 0x22 */
252
468
                                             "()"     /* 0x28,0x29 */
253
468
                                             ",,"     /* 0x2c */
254
468
                                             "//"     /* 0x2f */
255
468
                                             ":@"     /* 0x3a-0x40 */
256
468
                                             "[]"     /* 0x5b-0x5d */
257
468
                                             "{\xff"; /* 0x7b-0xff */
258
468
    const char *buf_start = buf;
259
468
    int found;
260
468
    buf = findchar_fast(buf, buf_end, ranges, sizeof(ranges) - 1, &found);
261
468
    if (!found) {
262
468
        CHECK_EOF();
263
468
    }
264
5.14k
    while (1) {
265
5.14k
        if (*buf == next_char) {
266
468
            break;
267
4.68k
        } else if (!token_char_map[(unsigned char)*buf]) {
268
0
            *ret = -1;
269
0
            return NULL;
270
0
        }
271
4.68k
        ++buf;
272
4.68k
        CHECK_EOF();
273
4.68k
    }
274
468
    *token = buf_start;
275
468
    *token_len = buf - buf_start;
276
468
    return buf;
277
468
}
278
279
/* returned pointer is always within [buf, buf_end), or null */
280
static const char *parse_http_version(const char *buf, const char *buf_end, int *minor_version, int *ret)
281
468
{
282
    /* we want at least [HTTP/1.<two chars>] to try to parse */
283
468
    if (buf_end - buf < 9) {
284
0
        *ret = -2;
285
0
        return NULL;
286
0
    }
287
468
    EXPECT_CHAR_NO_CHECK('H');
288
468
    EXPECT_CHAR_NO_CHECK('T');
289
468
    EXPECT_CHAR_NO_CHECK('T');
290
468
    EXPECT_CHAR_NO_CHECK('P');
291
468
    EXPECT_CHAR_NO_CHECK('/');
292
468
    EXPECT_CHAR_NO_CHECK('1');
293
468
    EXPECT_CHAR_NO_CHECK('.');
294
468
    PARSE_INT(minor_version, 1);
295
468
    return buf;
296
468
}
297
298
static const char *parse_headers(const char *buf, const char *buf_end, struct phr_header *headers, size_t *num_headers,
299
                                 size_t max_headers, int *ret)
300
468
{
301
936
    for (;; ++*num_headers) {
302
936
        CHECK_EOF();
303
936
        if (*buf == '\015') {
304
468
            ++buf;
305
468
            EXPECT_CHAR('\012');
306
468
            break;
307
468
        } else if (*buf == '\012') {
308
0
            ++buf;
309
0
            break;
310
0
        }
311
468
        if (*num_headers == max_headers) {
312
0
            *ret = -1;
313
0
            return NULL;
314
0
        }
315
468
        if (!(*num_headers != 0 && (*buf == ' ' || *buf == '\t'))) {
316
            /* parsing name, but do not discard SP before colon, see
317
             * http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */
318
468
            if ((buf = parse_token(buf, buf_end, &headers[*num_headers].name, &headers[*num_headers].name_len, ':', ret)) == NULL) {
319
0
                return NULL;
320
0
            }
321
468
            if (headers[*num_headers].name_len == 0) {
322
0
                *ret = -1;
323
0
                return NULL;
324
0
            }
325
468
            ++buf;
326
936
            for (;; ++buf) {
327
936
                CHECK_EOF();
328
936
                if (!(*buf == ' ' || *buf == '\t')) {
329
468
                    break;
330
468
                }
331
936
            }
332
468
        } else {
333
0
            headers[*num_headers].name = NULL;
334
0
            headers[*num_headers].name_len = 0;
335
0
        }
336
468
        const char *value;
337
468
        size_t value_len;
338
468
        if ((buf = get_token_to_eol(buf, buf_end, &value, &value_len, ret)) == NULL) {
339
0
            return NULL;
340
0
        }
341
        /* remove trailing SPs and HTABs */
342
468
        const char *value_end = value + value_len;
343
468
        for (; value_end != value; --value_end) {
344
468
            const char c = *(value_end - 1);
345
468
            if (!(c == ' ' || c == '\t')) {
346
468
                break;
347
468
            }
348
468
        }
349
468
        headers[*num_headers].value = value;
350
468
        headers[*num_headers].value_len = value_end - value;
351
468
    }
352
468
    return buf;
353
468
}
354
355
static const char *parse_request(const char *buf, const char *buf_end, const char **method, size_t *method_len, const char **path,
356
                                 size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers,
357
                                 size_t max_headers, int *ret)
358
0
{
359
    /* skip first empty line (some clients add CRLF after POST content) */
360
0
    CHECK_EOF();
361
0
    if (*buf == '\015') {
362
0
        ++buf;
363
0
        EXPECT_CHAR('\012');
364
0
    } else if (*buf == '\012') {
365
0
        ++buf;
366
0
    }
367
368
    /* parse request line */
369
0
    if ((buf = parse_token(buf, buf_end, method, method_len, ' ', ret)) == NULL) {
370
0
        return NULL;
371
0
    }
372
0
    do {
373
0
        ++buf;
374
0
        CHECK_EOF();
375
0
    } while (*buf == ' ');
376
0
    ADVANCE_TOKEN(*path, *path_len);
377
0
    do {
378
0
        ++buf;
379
0
        CHECK_EOF();
380
0
    } while (*buf == ' ');
381
0
    if (*method_len == 0 || *path_len == 0) {
382
0
        *ret = -1;
383
0
        return NULL;
384
0
    }
385
0
    if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
386
0
        return NULL;
387
0
    }
388
0
    if (*buf == '\015') {
389
0
        ++buf;
390
0
        EXPECT_CHAR('\012');
391
0
    } else if (*buf == '\012') {
392
0
        ++buf;
393
0
    } else {
394
0
        *ret = -1;
395
0
        return NULL;
396
0
    }
397
398
0
    return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
399
0
}
400
401
int phr_parse_request(const char *buf_start, size_t len, const char **method, size_t *method_len, const char **path,
402
                      size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers, size_t last_len)
403
0
{
404
0
    const char *buf = buf_start, *buf_end = buf_start + len;
405
0
    size_t max_headers = *num_headers;
406
0
    int r;
407
408
0
    *method = NULL;
409
0
    *method_len = 0;
410
0
    *path = NULL;
411
0
    *path_len = 0;
412
0
    *minor_version = -1;
413
0
    *num_headers = 0;
414
415
    /* if last_len != 0, check if the request is complete (a fast countermeasure
416
       againt slowloris */
417
0
    if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
418
0
        return r;
419
0
    }
420
421
0
    if ((buf = parse_request(buf, buf_end, method, method_len, path, path_len, minor_version, headers, num_headers, max_headers,
422
0
                             &r)) == NULL) {
423
0
        return r;
424
0
    }
425
426
0
    return (int)(buf - buf_start);
427
0
}
428
429
static const char *parse_response(const char *buf, const char *buf_end, int *minor_version, int *status, const char **msg,
430
                                  size_t *msg_len, struct phr_header *headers, size_t *num_headers, size_t max_headers, int *ret)
431
468
{
432
    /* parse "HTTP/1.x" */
433
468
    if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
434
0
        return NULL;
435
0
    }
436
    /* skip space */
437
468
    if (*buf != ' ') {
438
0
        *ret = -1;
439
0
        return NULL;
440
0
    }
441
468
    do {
442
468
        ++buf;
443
468
        CHECK_EOF();
444
468
    } while (*buf == ' ');
445
    /* parse status code, we want at least [:digit:][:digit:][:digit:]<other char> to try to parse */
446
468
    if (buf_end - buf < 4) {
447
0
        *ret = -2;
448
0
        return NULL;
449
0
    }
450
468
    PARSE_INT_3(status);
451
452
    /* get message including preceding space */
453
468
    if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) {
454
0
        return NULL;
455
0
    }
456
468
    if (*msg_len == 0) {
457
        /* ok */
458
468
    } else if (**msg == ' ') {
459
        /* Remove preceding space. Successful return from `get_token_to_eol` guarantees that we would hit something other than SP
460
         * before running past the end of the given buffer. */
461
468
        do {
462
468
            ++*msg;
463
468
            --*msg_len;
464
468
        } while (**msg == ' ');
465
468
    } else {
466
        /* garbage found after status code */
467
0
        *ret = -1;
468
0
        return NULL;
469
0
    }
470
471
468
    return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
472
468
}
473
474
int phr_parse_response(const char *buf_start, size_t len, int *minor_version, int *status, const char **msg, size_t *msg_len,
475
                       struct phr_header *headers, size_t *num_headers, size_t last_len)
476
468
{
477
468
    const char *buf = buf_start, *buf_end = buf + len;
478
468
    size_t max_headers = *num_headers;
479
468
    int r;
480
481
468
    *minor_version = -1;
482
468
    *status = 0;
483
468
    *msg = NULL;
484
468
    *msg_len = 0;
485
468
    *num_headers = 0;
486
487
    /* if last_len != 0, check if the response is complete (a fast countermeasure
488
       against slowloris */
489
468
    if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
490
0
        return r;
491
0
    }
492
493
468
    if ((buf = parse_response(buf, buf_end, minor_version, status, msg, msg_len, headers, num_headers, max_headers, &r)) == NULL) {
494
0
        return r;
495
0
    }
496
497
468
    return (int)(buf - buf_start);
498
468
}
499
500
int phr_parse_headers(const char *buf_start, size_t len, struct phr_header *headers, size_t *num_headers, size_t last_len)
501
0
{
502
0
    const char *buf = buf_start, *buf_end = buf + len;
503
0
    size_t max_headers = *num_headers;
504
0
    int r;
505
506
0
    *num_headers = 0;
507
508
    /* if last_len != 0, check if the response is complete (a fast countermeasure
509
       against slowloris */
510
0
    if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
511
0
        return r;
512
0
    }
513
514
0
    if ((buf = parse_headers(buf, buf_end, headers, num_headers, max_headers, &r)) == NULL) {
515
0
        return r;
516
0
    }
517
518
0
    return (int)(buf - buf_start);
519
0
}
520
521
enum {
522
    CHUNKED_IN_CHUNK_SIZE,
523
    CHUNKED_IN_CHUNK_EXT,
524
    CHUNKED_IN_CHUNK_DATA,
525
    CHUNKED_IN_CHUNK_CRLF,
526
    CHUNKED_IN_TRAILERS_LINE_HEAD,
527
    CHUNKED_IN_TRAILERS_LINE_MIDDLE
528
};
529
530
static int decode_hex(int ch)
531
0
{
532
0
    if ('0' <= ch && ch <= '9') {
533
0
        return ch - '0';
534
0
    } else if ('A' <= ch && ch <= 'F') {
535
0
        return ch - 'A' + 0xa;
536
0
    } else if ('a' <= ch && ch <= 'f') {
537
0
        return ch - 'a' + 0xa;
538
0
    } else {
539
0
        return -1;
540
0
    }
541
0
}
542
543
ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *_bufsz)
544
0
{
545
0
    size_t dst = 0, src = 0, bufsz = *_bufsz;
546
0
    ssize_t ret = -2; /* incomplete */
547
548
0
    decoder->_total_read += bufsz;
549
550
0
    while (1) {
551
0
        switch (decoder->_state) {
552
0
        case CHUNKED_IN_CHUNK_SIZE:
553
0
            for (;; ++src) {
554
0
                int v;
555
0
                if (src == bufsz)
556
0
                    goto Exit;
557
0
                if ((v = decode_hex(buf[src])) == -1) {
558
0
                    if (decoder->_hex_count == 0) {
559
0
                        ret = -1;
560
0
                        goto Exit;
561
0
                    }
562
                    /* the only characters that may appear after the chunk size are BWS, semicolon, or CRLF */
563
0
                    switch (buf[src]) {
564
0
                    case ' ':
565
0
                    case '\011':
566
0
                    case ';':
567
0
                    case '\012':
568
0
                    case '\015':
569
0
                        break;
570
0
                    default:
571
0
                        ret = -1;
572
0
                        goto Exit;
573
0
                    }
574
0
                    break;
575
0
                }
576
0
                if (decoder->_hex_count == sizeof(size_t) * 2) {
577
0
                    ret = -1;
578
0
                    goto Exit;
579
0
                }
580
0
                decoder->bytes_left_in_chunk = decoder->bytes_left_in_chunk * 16 + v;
581
0
                ++decoder->_hex_count;
582
0
            }
583
0
            decoder->_hex_count = 0;
584
0
            decoder->_state = CHUNKED_IN_CHUNK_EXT;
585
        /* fallthru */
586
0
        case CHUNKED_IN_CHUNK_EXT:
587
            /* RFC 7230 A.2 "Line folding in chunk extensions is disallowed" */
588
0
            for (;; ++src) {
589
0
                if (src == bufsz)
590
0
                    goto Exit;
591
0
                if (buf[src] == '\012')
592
0
                    break;
593
0
            }
594
0
            ++src;
595
0
            if (decoder->bytes_left_in_chunk == 0) {
596
0
                if (decoder->consume_trailer) {
597
0
                    decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
598
0
                    break;
599
0
                } else {
600
0
                    goto Complete;
601
0
                }
602
0
            }
603
0
            decoder->_state = CHUNKED_IN_CHUNK_DATA;
604
        /* fallthru */
605
0
        case CHUNKED_IN_CHUNK_DATA: {
606
0
            size_t avail = bufsz - src;
607
0
            if (avail < decoder->bytes_left_in_chunk) {
608
0
                if (dst != src)
609
0
                    memmove(buf + dst, buf + src, avail);
610
0
                src += avail;
611
0
                dst += avail;
612
0
                decoder->bytes_left_in_chunk -= avail;
613
0
                goto Exit;
614
0
            }
615
0
            if (dst != src)
616
0
                memmove(buf + dst, buf + src, decoder->bytes_left_in_chunk);
617
0
            src += decoder->bytes_left_in_chunk;
618
0
            dst += decoder->bytes_left_in_chunk;
619
0
            decoder->bytes_left_in_chunk = 0;
620
0
            decoder->_state = CHUNKED_IN_CHUNK_CRLF;
621
0
        }
622
        /* fallthru */
623
0
        case CHUNKED_IN_CHUNK_CRLF:
624
0
            for (;; ++src) {
625
0
                if (src == bufsz)
626
0
                    goto Exit;
627
0
                if (buf[src] != '\015')
628
0
                    break;
629
0
            }
630
0
            if (buf[src] != '\012') {
631
0
                ret = -1;
632
0
                goto Exit;
633
0
            }
634
0
            ++src;
635
0
            decoder->_state = CHUNKED_IN_CHUNK_SIZE;
636
0
            break;
637
0
        case CHUNKED_IN_TRAILERS_LINE_HEAD:
638
0
            for (;; ++src) {
639
0
                if (src == bufsz)
640
0
                    goto Exit;
641
0
                if (buf[src] != '\015')
642
0
                    break;
643
0
            }
644
0
            if (buf[src++] == '\012')
645
0
                goto Complete;
646
0
            decoder->_state = CHUNKED_IN_TRAILERS_LINE_MIDDLE;
647
        /* fallthru */
648
0
        case CHUNKED_IN_TRAILERS_LINE_MIDDLE:
649
0
            for (;; ++src) {
650
0
                if (src == bufsz)
651
0
                    goto Exit;
652
0
                if (buf[src] == '\012')
653
0
                    break;
654
0
            }
655
0
            ++src;
656
0
            decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
657
0
            break;
658
0
        default:
659
0
            assert(!"decoder is corrupt");
660
0
        }
661
0
    }
662
663
0
Complete:
664
0
    ret = bufsz - src;
665
0
Exit:
666
0
    if (dst != src)
667
0
        memmove(buf + dst, buf + src, bufsz - src);
668
0
    *_bufsz = dst;
669
    /* if incomplete but the overhead of the chunked encoding is >=100KB and >80%, signal an error */
670
0
    if (ret == -2) {
671
0
        decoder->_total_overhead += bufsz - dst;
672
0
        if (decoder->_total_overhead >= 100 * 1024 && decoder->_total_read - decoder->_total_overhead < decoder->_total_read / 4)
673
0
            ret = -1;
674
0
    }
675
0
    return ret;
676
0
}
677
678
int phr_decode_chunked_is_in_data(struct phr_chunked_decoder *decoder)
679
0
{
680
0
    return decoder->_state == CHUNKED_IN_CHUNK_DATA;
681
0
}
682
683
#undef CHECK_EOF
684
#undef EXPECT_CHAR
685
#undef ADVANCE_TOKEN