/src/h2o/deps/picohttpparser/picohttpparser.c

Source (jump to first uncovered line)
/*
 * Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase,
 *                         Shigeo Mitsunari
 *
 * The software is licensed under either the MIT License (below) or the Perl
 * license.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal in the Software without restriction, including without limitation the
 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 * sell copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 */

#include <assert.h>
#include <stddef.h>
#include <string.h>
#ifdef __SSE4_2__
#ifdef _MSC_VER
#include <nmmintrin.h>
#else
#include <x86intrin.h>
#endif
#endif
#include "picohttpparser.h"

#if __GNUC__ >= 3
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
#else
#define likely(x) (x)
#define unlikely(x) (x)
#endif

#ifdef _MSC_VER
#define ALIGNED(n) _declspec(align(n))
#else
#define ALIGNED(n) __attribute__((aligned(n)))
#endif

#define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)-040u < 0137u)

#define CHECK_EOF()                                                                                                                \
    if (buf == buf_end) {                                                                                                          \
        *ret = -2;                                                                                                                 \
        return NULL;                                                                                                               \
    }

#define EXPECT_CHAR_NO_CHECK(ch)                                                                                                   \
    if (*buf++ != ch) {                                                                                                            \
        *ret = -1;                                                                                                                 \
        return NULL;                                                                                                               \
    }

#define EXPECT_CHAR(ch)                                                                                                            \
    CHECK_EOF();                                                                                                                   \
    EXPECT_CHAR_NO_CHECK(ch);

#define ADVANCE_TOKEN(tok, toklen)                                                                                                 \
    do {                                                                                                                           \
        const char *tok_start = buf;                                                                                               \
        static const char ALIGNED(16) ranges2[16] = "\000\040\177\177";                                                            \
        int found2;                                                                                                                \
        buf = findchar_fast(buf, buf_end, ranges2, 4, &found2);                                                                    \
        if (!found2) {                                                                                                             \
            CHECK_EOF();                                                                                                           \
        }                                                                                                                          \
        while (1) {                                                                                                                \
            if (*buf == ' ') {                                                                                                     \
                break;                                                                                                             \
            } else if (unlikely(!IS_PRINTABLE_ASCII(*buf))) {                                                                      \
                if ((unsigned char)*buf < '\040' || *buf == '\177') {                                                              \
                    *ret = -1;                                                                                                     \
                    return NULL;                                                                                                   \
                }                                                                                                                  \
            }                                                                                                                      \
            ++buf;                                                                                                                 \
            CHECK_EOF();                                                                                                           \
        }                                                                                                                          \
        tok = tok_start;                                                                                                           \
        toklen = buf - tok_start;                                                                                                  \
    } while (0)

static const char *token_char_map = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
                                    "\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
                                    "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
                                    "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";

static const char *findchar_fast(const char *buf, const char *buf_end, const char *ranges, size_t ranges_size, int *found)
{
    *found = 0;
#if __SSE4_2__
    if (likely(buf_end - buf >= 16)) {
        __m128i ranges16 = _mm_loadu_si128((const __m128i *)ranges);

        size_t left = (buf_end - buf) & ~15;
        do {
            __m128i b16 = _mm_loadu_si128((const __m128i *)buf);
            int r = _mm_cmpestri(ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS);
            if (unlikely(r != 16)) {
                buf += r;
                *found = 1;
                break;
            }
            buf += 16;
            left -= 16;
        } while (likely(left != 0));
    }
#else
    /* suppress unused parameter warning */
    (void)buf_end;
    (void)ranges;
    (void)ranges_size;
#endif
    return buf;
}

static const char *get_token_to_eol(const char *buf, const char *buf_end, const char **token, size_t *token_len, int *ret)
{
    const char *token_start = buf;

#ifdef __SSE4_2__
    static const char ALIGNED(16) ranges1[16] = "\0\010"    /* allow HT */
                                                "\012\037"  /* allow SP and up to but not including DEL */
                                                "\177\177"; /* allow chars w. MSB set */
    int found;
    buf = findchar_fast(buf, buf_end, ranges1, 6, &found);
    if (found)
        goto FOUND_CTL;
#else
    /* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */
    while (likely(buf_end - buf >= 8)) {
#define DOIT()                                                                                                                     \
    do {                                                                                                                           \
        if (unlikely(!IS_PRINTABLE_ASCII(*buf)))                                                                                   \
            goto NonPrintable;                                                                                                     \
        ++buf;                                                                                                                     \
    } while (0)
        DOIT();
        DOIT();
        DOIT();
        DOIT();
        DOIT();
        DOIT();
        DOIT();
        DOIT();
#undef DOIT
        continue;
    NonPrintable:
        if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
            goto FOUND_CTL;
        }
        ++buf;
    }
#endif
    for (;; ++buf) {
        CHECK_EOF();
        if (unlikely(!IS_PRINTABLE_ASCII(*buf))) {
            if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
                goto FOUND_CTL;
            }
        }
    }
FOUND_CTL:
    if (likely(*buf == '\015')) {
        ++buf;
        EXPECT_CHAR('\012');
        *token_len = buf - 2 - token_start;
    } else if (*buf == '\012') {
        *token_len = buf - token_start;
        ++buf;
    } else {
        *ret = -1;
        return NULL;
    }
    *token = token_start;

    return buf;
}

static const char *is_complete(const char *buf, const char *buf_end, size_t last_len, int *ret)
{
    int ret_cnt = 0;
    buf = last_len < 3 ? buf : buf + last_len - 3;

    while (1) {
        CHECK_EOF();
        if (*buf == '\015') {
            ++buf;
            CHECK_EOF();
            EXPECT_CHAR('\012');
            ++ret_cnt;
        } else if (*buf == '\012') {
            ++buf;
            ++ret_cnt;
        } else {
            ++buf;
            ret_cnt = 0;
        }
        if (ret_cnt == 2) {
            return buf;
        }
    }

    *ret = -2;
    return NULL;
}

#define PARSE_INT(valp_, mul_)                                                                                                     \
    if (*buf < '0' || '9' < *buf) {                                                                                                \
        buf++;                                                                                                                     \
        *ret = -1;                                                                                                                 \
        return NULL;                                                                                                               \
    }                                                                                                                              \
    *(valp_) = (mul_) * (*buf++ - '0');

#define PARSE_INT_3(valp_)                                                                                                         \
    do {                                                                                                                           \
        int res_ = 0;                                                                                                              \
        PARSE_INT(&res_, 100)                                                                                                      \
        *valp_ = res_;                                                                                                             \
        PARSE_INT(&res_, 10)                                                                                                       \
        *valp_ += res_;                                                                                                            \
        PARSE_INT(&res_, 1)                                                                                                        \
        *valp_ += res_;                                                                                                            \
    } while (0)

/* returned pointer is always within [buf, buf_end), or null */
static const char *parse_token(const char *buf, const char *buf_end, const char **token, size_t *token_len, char next_char,
                               int *ret)
{
    /* We use pcmpestri to detect non-token characters. This instruction can take no more than eight character ranges (8*2*8=128
     * bits that is the size of a SSE register). Due to this restriction, characters `|` and `~` are handled in the slow loop. */
    static const char ALIGNED(16) ranges[] = "\x00 "  /* control chars and up to SP */
                                             "\"\""   /* 0x22 */
                                             "()"     /* 0x28,0x29 */
                                             ",,"     /* 0x2c */
                                             "//"     /* 0x2f */
                                             ":@"     /* 0x3a-0x40 */
                                             "[]"     /* 0x5b-0x5d */
                                             "{\xff"; /* 0x7b-0xff */
    const char *buf_start = buf;
    int found;
    buf = findchar_fast(buf, buf_end, ranges, sizeof(ranges) - 1, &found);
    if (!found) {
        CHECK_EOF();
    }
    while (1) {
        if (*buf == next_char) {
            break;
        } else if (!token_char_map[(unsigned char)*buf]) {
            *ret = -1;
            return NULL;
        }
        ++buf;
        CHECK_EOF();
    }
    *token = buf_start;
    *token_len = buf - buf_start;
    return buf;
}

/* returned pointer is always within [buf, buf_end), or null */
static const char *parse_http_version(const char *buf, const char *buf_end, int *minor_version, int *ret)
{
    /* we want at least [HTTP/1.<two chars>] to try to parse */
    if (buf_end - buf < 9) {
        *ret = -2;
        return NULL;
    }
    EXPECT_CHAR_NO_CHECK('H');
    EXPECT_CHAR_NO_CHECK('T');
    EXPECT_CHAR_NO_CHECK('T');
    EXPECT_CHAR_NO_CHECK('P');
    EXPECT_CHAR_NO_CHECK('/');
    EXPECT_CHAR_NO_CHECK('1');
    EXPECT_CHAR_NO_CHECK('.');
    PARSE_INT(minor_version, 1);
    return buf;
}

static const char *parse_headers(const char *buf, const char *buf_end, struct phr_header *headers, size_t *num_headers,
                                 size_t max_headers, int *ret)
{
    for (;; ++*num_headers) {
        CHECK_EOF();
        if (*buf == '\015') {
            ++buf;
            EXPECT_CHAR('\012');
            break;
        } else if (*buf == '\012') {
            ++buf;
            break;
        }
        if (*num_headers == max_headers) {
            *ret = -1;
            return NULL;
        }
        if (!(*num_headers != 0 && (*buf == ' ' || *buf == '\t'))) {
            /* parsing name, but do not discard SP before colon, see
             * http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */
            if ((buf = parse_token(buf, buf_end, &headers[*num_headers].name, &headers[*num_headers].name_len, ':', ret)) == NULL) {
                return NULL;
            }
            if (headers[*num_headers].name_len == 0) {
                *ret = -1;
                return NULL;
            }
            ++buf;
            for (;; ++buf) {
                CHECK_EOF();
                if (!(*buf == ' ' || *buf == '\t')) {
                    break;
                }
            }
        } else {
            headers[*num_headers].name = NULL;
            headers[*num_headers].name_len = 0;
        }
        const char *value;
        size_t value_len;
        if ((buf = get_token_to_eol(buf, buf_end, &value, &value_len, ret)) == NULL) {
            return NULL;
        }
        /* remove trailing SPs and HTABs */
        const char *value_end = value + value_len;
        for (; value_end != value; --value_end) {
            const char c = *(value_end - 1);
            if (!(c == ' ' || c == '\t')) {
                break;
            }
        }
        headers[*num_headers].value = value;
        headers[*num_headers].value_len = value_end - value;
    }
    return buf;
}

static const char *parse_request(const char *buf, const char *buf_end, const char **method, size_t *method_len, const char **path,
                                 size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers,
                                 size_t max_headers, int *ret)
{
    /* skip first empty line (some clients add CRLF after POST content) */
    CHECK_EOF();
    if (*buf == '\015') {
        ++buf;
        EXPECT_CHAR('\012');
    } else if (*buf == '\012') {
        ++buf;
    }

    /* parse request line */
    if ((buf = parse_token(buf, buf_end, method, method_len, ' ', ret)) == NULL) {
        return NULL;
    }
    do {
        ++buf;
        CHECK_EOF();
    } while (*buf == ' ');
    ADVANCE_TOKEN(*path, *path_len);
    do {
        ++buf;
        CHECK_EOF();
    } while (*buf == ' ');
    if (*method_len == 0 || *path_len == 0) {
        *ret = -1;
        return NULL;
    }
    if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
        return NULL;
    }
    if (*buf == '\015') {
        ++buf;
        EXPECT_CHAR('\012');
    } else if (*buf == '\012') {
        ++buf;
    } else {
        *ret = -1;
        return NULL;
    }

    return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
}

int phr_parse_request(const char *buf_start, size_t len, const char **method, size_t *method_len, const char **path,
                      size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers, size_t last_len)
{
    const char *buf = buf_start, *buf_end = buf_start + len;
    size_t max_headers = *num_headers;
    int r;

    *method = NULL;
    *method_len = 0;
    *path = NULL;
    *path_len = 0;
    *minor_version = -1;
    *num_headers = 0;

    /* if last_len != 0, check if the request is complete (a fast countermeasure
       againt slowloris */
    if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
        return r;
    }

    if ((buf = parse_request(buf, buf_end, method, method_len, path, path_len, minor_version, headers, num_headers, max_headers,
                             &r)) == NULL) {
        return r;
    }

    return (int)(buf - buf_start);
}

static const char *parse_response(const char *buf, const char *buf_end, int *minor_version, int *status, const char **msg,
                                  size_t *msg_len, struct phr_header *headers, size_t *num_headers, size_t max_headers, int *ret)
{
    /* parse "HTTP/1.x" */
    if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
        return NULL;
    }
    /* skip space */
    if (*buf != ' ') {
        *ret = -1;
        return NULL;
    }
    do {
        ++buf;
        CHECK_EOF();
    } while (*buf == ' ');
    /* parse status code, we want at least [:digit:][:digit:][:digit:]<other char> to try to parse */
    if (buf_end - buf < 4) {
        *ret = -2;
        return NULL;
    }
    PARSE_INT_3(status);

    /* get message including preceding space */
    if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) {
        return NULL;
    }
    if (*msg_len == 0) {
        /* ok */
    } else if (**msg == ' ') {
        /* Remove preceding space. Successful return from `get_token_to_eol` guarantees that we would hit something other than SP
         * before running past the end of the given buffer. */
        do {
            ++*msg;
            --*msg_len;
        } while (**msg == ' ');
    } else {
        /* garbage found after status code */
        *ret = -1;
        return NULL;
    }

    return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
}

int phr_parse_response(const char *buf_start, size_t len, int *minor_version, int *status, const char **msg, size_t *msg_len,
                       struct phr_header *headers, size_t *num_headers, size_t last_len)
{
    const char *buf = buf_start, *buf_end = buf + len;
    size_t max_headers = *num_headers;
    int r;

    *minor_version = -1;
    *status = 0;
    *msg = NULL;
    *msg_len = 0;
    *num_headers = 0;

    /* if last_len != 0, check if the response is complete (a fast countermeasure
       against slowloris */
    if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
        return r;
    }

    if ((buf = parse_response(buf, buf_end, minor_version, status, msg, msg_len, headers, num_headers, max_headers, &r)) == NULL) {
        return r;
    }

    return (int)(buf - buf_start);
}

int phr_parse_headers(const char *buf_start, size_t len, struct phr_header *headers, size_t *num_headers, size_t last_len)
{
    const char *buf = buf_start, *buf_end = buf + len;
    size_t max_headers = *num_headers;
    int r;

    *num_headers = 0;

    /* if last_len != 0, check if the response is complete (a fast countermeasure
       against slowloris */
    if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
        return r;
    }

    if ((buf = parse_headers(buf, buf_end, headers, num_headers, max_headers, &r)) == NULL) {
        return r;
    }

    return (int)(buf - buf_start);
}

enum {
    CHUNKED_IN_CHUNK_SIZE,
    CHUNKED_IN_CHUNK_EXT,
    CHUNKED_IN_CHUNK_DATA,
    CHUNKED_IN_CHUNK_CRLF,
    CHUNKED_IN_TRAILERS_LINE_HEAD,
    CHUNKED_IN_TRAILERS_LINE_MIDDLE
};

static int decode_hex(int ch)
{
    if ('0' <= ch && ch <= '9') {
        return ch - '0';
    } else if ('A' <= ch && ch <= 'F') {
        return ch - 'A' + 0xa;
    } else if ('a' <= ch && ch <= 'f') {
        return ch - 'a' + 0xa;
    } else {
        return -1;
    }
}

ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *_bufsz)
{
    size_t dst = 0, src = 0, bufsz = *_bufsz;
    ssize_t ret = -2; /* incomplete */

    decoder->_total_read += bufsz;

    while (1) {
        switch (decoder->_state) {
        case CHUNKED_IN_CHUNK_SIZE:
            for (;; ++src) {
                int v;
                if (src == bufsz)
                    goto Exit;
                if ((v = decode_hex(buf[src])) == -1) {
                    if (decoder->_hex_count == 0) {
                        ret = -1;
                        goto Exit;
                    }
                    /* the only characters that may appear after the chunk size are BWS, semicolon, or CRLF */
                    switch (buf[src]) {
                    case ' ':
                    case '\011':
                    case ';':
                    case '\012':
                    case '\015':
                        break;
                    default:
                        ret = -1;
                        goto Exit;
                    }
                    break;
                }
                if (decoder->_hex_count == sizeof(size_t) * 2) {
                    ret = -1;
                    goto Exit;
                }
                decoder->bytes_left_in_chunk = decoder->bytes_left_in_chunk * 16 + v;
                ++decoder->_hex_count;
            }
            decoder->_hex_count = 0;
            decoder->_state = CHUNKED_IN_CHUNK_EXT;
        /* fallthru */
        case CHUNKED_IN_CHUNK_EXT:
            /* RFC 7230 A.2 "Line folding in chunk extensions is disallowed" */
            for (;; ++src) {
                if (src == bufsz)
                    goto Exit;
                if (buf[src] == '\012')
                    break;
            }
            ++src;
            if (decoder->bytes_left_in_chunk == 0) {
                if (decoder->consume_trailer) {
                    decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
                    break;
                } else {
                    goto Complete;
                }
            }
            decoder->_state = CHUNKED_IN_CHUNK_DATA;
        /* fallthru */
        case CHUNKED_IN_CHUNK_DATA: {
            size_t avail = bufsz - src;
            if (avail < decoder->bytes_left_in_chunk) {
                if (dst != src)
                    memmove(buf + dst, buf + src, avail);
                src += avail;
                dst += avail;
                decoder->bytes_left_in_chunk -= avail;
                goto Exit;
            }
            if (dst != src)
                memmove(buf + dst, buf + src, decoder->bytes_left_in_chunk);
            src += decoder->bytes_left_in_chunk;
            dst += decoder->bytes_left_in_chunk;
            decoder->bytes_left_in_chunk = 0;
            decoder->_state = CHUNKED_IN_CHUNK_CRLF;
        }
        /* fallthru */
        case CHUNKED_IN_CHUNK_CRLF:
            for (;; ++src) {
                if (src == bufsz)
                    goto Exit;
                if (buf[src] != '\015')
                    break;
            }
            if (buf[src] != '\012') {
                ret = -1;
                goto Exit;
            }
            ++src;
            decoder->_state = CHUNKED_IN_CHUNK_SIZE;
            break;
        case CHUNKED_IN_TRAILERS_LINE_HEAD:
            for (;; ++src) {
                if (src == bufsz)
                    goto Exit;
                if (buf[src] != '\015')
                    break;
            }
            if (buf[src++] == '\012')
                goto Complete;
            decoder->_state = CHUNKED_IN_TRAILERS_LINE_MIDDLE;
        /* fallthru */
        case CHUNKED_IN_TRAILERS_LINE_MIDDLE:
            for (;; ++src) {
                if (src == bufsz)
                    goto Exit;
                if (buf[src] == '\012')
                    break;
            }
            ++src;
            decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
            break;
        default:
            assert(!"decoder is corrupt");
        }
    }

Complete:
    ret = bufsz - src;
Exit:
    if (dst != src)
        memmove(buf + dst, buf + src, bufsz - src);
    *_bufsz = dst;
    /* if incomplete but the overhead of the chunked encoding is >=100KB and >80%, signal an error */
    if (ret == -2) {
        decoder->_total_overhead += bufsz - dst;
        if (decoder->_total_overhead >= 100 * 1024 && decoder->_total_read - decoder->_total_overhead < decoder->_total_read / 4)
            ret = -1;
    }
    return ret;
}

int phr_decode_chunked_is_in_data(struct phr_chunked_decoder *decoder)
{
    return decoder->_state == CHUNKED_IN_CHUNK_DATA;
}

#undef CHECK_EOF
#undef EXPECT_CHAR
#undef ADVANCE_TOKEN

Coverage Report

Created: 2024-09-11 06:26

Line	Count	Source (jump to first uncovered line)
1		/*
2		* Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase,
3		* Shigeo Mitsunari
4		*
5		* The software is licensed under either the MIT License (below) or the Perl
6		* license.
7		*
8		* Permission is hereby granted, free of charge, to any person obtaining a copy
9		* of this software and associated documentation files (the "Software"), to
10		* deal in the Software without restriction, including without limitation the
11		* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
12		* sell copies of the Software, and to permit persons to whom the Software is
13		* furnished to do so, subject to the following conditions:
14		*
15		* The above copyright notice and this permission notice shall be included in
16		* all copies or substantial portions of the Software.
17		*
18		* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19		* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20		* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21		* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22		* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23		* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24		* IN THE SOFTWARE.
25		*/
26
27		#include <assert.h>
28		#include <stddef.h>
29		#include <string.h>
30		#ifdef __SSE4_2__
31		#ifdef _MSC_VER
32		#include <nmmintrin.h>
33		#else
34		#include <x86intrin.h>
35		#endif
36		#endif
37		#include "picohttpparser.h"
38
39		#if __GNUC__ >= 3
40	643k	#define likely(x) __builtin_expect(!!(x), 1)
41	1.32M	#define unlikely(x) __builtin_expect(!!(x), 0)
42		#else
43		#define likely(x) (x)
44		#define unlikely(x) (x)
45		#endif
46
47		#ifdef _MSC_VER
48		#define ALIGNED(n) _declspec(align(n))
49		#else
50		#define ALIGNED(n) __attribute__((aligned(n)))
51		#endif
52
53		#define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)-040u < 0137u)
54
55		#define CHECK_EOF() \
56	1.90M	if (buf == buf_end) { \
57	1.81k	*ret = -2; \
58	1.81k	return NULL; \
59	1.81k	}
60
61		#define EXPECT_CHAR_NO_CHECK(ch) \
62	125k	if (*buf++ != ch) { \
63	4.51k	*ret = -1; \
64	4.51k	return NULL; \
65	4.51k	}
66
67		#define EXPECT_CHAR(ch) \
68	14.5k	CHECK_EOF(); \
69	14.4k	EXPECT_CHAR_NO_CHECK(ch);
70
71		#define ADVANCE_TOKEN(tok, toklen) \
72	14.5k	do { \
73	14.5k	const char *tok_start = buf; \
74	14.5k	static const char ALIGNED(16) ranges2[16] = "\000\040\177\177"; \
75	14.5k	int found2; \
76	14.5k	buf = findchar_fast(buf, buf_end, ranges2, 4, &found2); \
77	14.5k	if (!found2) { \
78	14.5k	CHECK_EOF(); \
79	14.5k	} \
80	743k	while (1) { \
81	743k	if (*buf == ' ') { \
82	14.2k	break; \
83	728k	} else if (unlikely(!IS_PRINTABLE_ASCII(*buf))) { \
84	352k	if ((unsigned char)buf < '\040' \|\| buf == '\177') { \
85	133	*ret = -1; \
86	133	return NULL; \
87	133	} \
88	352k	} \
89	743k	++buf; \
90	728k	CHECK_EOF(); \
91	728k	} \
92	14.5k	tok = tok_start; \
93	14.2k	toklen = buf - tok_start; \
94	14.2k	} while (0)
95
96		static const char *token_char_map = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
97		"\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
98		"\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
99		"\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
100		"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
101		"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
102		"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
103		"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
104
105		static const char findchar_fast(const char buf, const char buf_end, const char ranges, size_t ranges_size, int *found)
106	78.3k	{
107	78.3k	*found = 0;
108		#if __SSE4_2__
109		if (likely(buf_end - buf >= 16)) {
110		__m128i ranges16 = _mm_loadu_si128((const __m128i *)ranges);
111
112		size_t left = (buf_end - buf) & ~15;
113		do {
114		__m128i b16 = _mm_loadu_si128((const __m128i *)buf);
115		int r = _mm_cmpestri(ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT \| _SIDD_CMP_RANGES \| _SIDD_UBYTE_OPS);
116		if (unlikely(r != 16)) {
117		buf += r;
118		*found = 1;
119		break;
120		}
121		buf += 16;
122		left -= 16;
123		} while (likely(left != 0));
124		}
125		#else
126		/* suppress unused parameter warning */
127	78.3k	(void)buf_end;
128	78.3k	(void)ranges;
129	78.3k	(void)ranges_size;
130	78.3k	#endif
131	78.3k	return buf;
132	78.3k	}
133
134		static const char get_token_to_eol(const char buf, const char buf_end, const char token, size_t token_len, int *ret)
135	52.5k	{
136	52.5k	const char *token_start = buf;
137
138		#ifdef __SSE4_2__
139		static const char ALIGNED(16) ranges1[16] = "\0\010" /* allow HT */
140		"\012\037" /* allow SP and up to but not including DEL */
141		"\177\177"; /* allow chars w. MSB set */
142		int found;
143		buf = findchar_fast(buf, buf_end, ranges1, 6, &found);
144		if (found)
145		goto FOUND_CTL;
146		#else
147		/* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */
148	192k	while (likely(buf_end - buf >= 8)) {
149	188k	#define DOIT() \
150	464k	do { \
151	464k	if (unlikely(!IS_PRINTABLE_ASCII(*buf))) \
152	464k	goto NonPrintable; \
153	464k	++buf; \
154	297k	} while (0)
155	188k	DOIT();
156	58.4k	DOIT();
157	51.4k	DOIT();
158	44.4k	DOIT();
159	36.3k	DOIT();
160	33.8k	DOIT();
161	28.1k	DOIT();
162	23.6k	DOIT();
163	21.0k	#undef DOIT
164	21.0k	continue;
165	167k	NonPrintable:
166	167k	if ((likely((unsigned char)buf < '\040') && likely(buf != '\011')) \|\| unlikely(*buf == '\177')) {
167	48.2k	goto FOUND_CTL;
168	48.2k	}
169	118k	++buf;
170	118k	}
171	4.30k	#endif
172	7.81k	for (;; ++buf) {
173	7.81k	CHECK_EOF();
174	7.47k	if (unlikely(!IS_PRINTABLE_ASCII(*buf))) {
175	5.04k	if ((likely((unsigned char)buf < '\040') && likely(buf != '\011')) \|\| unlikely(*buf == '\177')) {
176	3.96k	goto FOUND_CTL;
177	3.96k	}
178	5.04k	}
179	7.47k	}
180	52.2k	FOUND_CTL:
181	52.2k	if (likely(*buf == '\015')) {
182	8.94k	++buf;
183	17.8k	EXPECT_CHAR('\012');
184	17.8k	*token_len = buf - 2 - token_start;
185	43.3k	} else if (*buf == '\012') {
186	43.2k	*token_len = buf - token_start;
187	43.2k	++buf;
188	43.2k	} else {
189	76	*ret = -1;
190	76	return NULL;
191	76	}
192	52.1k	*token = token_start;
193
194	52.1k	return buf;
195	52.2k	}
196
197		static const char is_complete(const char buf, const char buf_end, size_t last_len, int ret)
198	1.44k	{
199	1.44k	int ret_cnt = 0;
200	1.44k	buf = last_len < 3 ? buf : buf + last_len - 3;
201
202	200k	while (1) {
203	200k	CHECK_EOF();
204	200k	if (*buf == '\015') {
205	956	++buf;
206	956	CHECK_EOF();
207	1.82k	EXPECT_CHAR('\012');
208	1.82k	++ret_cnt;
209	199k	} else if (*buf == '\012') {
210	5.45k	++buf;
211	5.45k	++ret_cnt;
212	193k	} else {
213	193k	++buf;
214	193k	ret_cnt = 0;
215	193k	}
216	200k	if (ret_cnt == 2) {
217	1.17k	return buf;
218	1.17k	}
219	200k	}
220
221	0	*ret = -2;
222	0	return NULL;
223	1.44k	}
224
225		#define PARSE_INT(valp_, mul_) \
226	20.7k	if (buf < '0' \|\| '9' < buf) { \
227	8	buf++; \
228	8	*ret = -1; \
229	8	return NULL; \
230	8	} \
231	20.7k	(valp_) = (mul_) (*buf++ - '0');
232
233		#define PARSE_INT_3(valp_) \
234	2.87k	do { \
235	2.87k	int res_ = 0; \
236	2.87k	PARSE_INT(&res_, 100) \
237	2.87k	*valp_ = res_; \
238	2.87k	PARSE_INT(&res_, 10) \
239	2.87k	*valp_ += res_; \
240	2.87k	PARSE_INT(&res_, 1) \
241	2.87k	*valp_ += res_; \
242	2.87k	} while (0)
243
244		/* returned pointer is always within [buf, buf_end), or null */
245		static const char parse_token(const char buf, const char buf_end, const char token, size_t token_len, char next_char,
246		int *ret)
247	63.8k	{
248		/* We use pcmpestri to detect non-token characters. This instruction can take no more than eight character ranges (828=128
249		* bits that is the size of a SSE register). Due to this restriction, characters `\|` and `~` are handled in the slow loop. */
250	63.8k	static const char ALIGNED(16) ranges[] = "\x00 " /* control chars and up to SP */
251	63.8k	"\"\"" /* 0x22 */
252	63.8k	"()" /* 0x28,0x29 */
253	63.8k	",," /* 0x2c */
254	63.8k	"//" /* 0x2f */
255	63.8k	":@" /* 0x3a-0x40 */
256	63.8k	"[]" /* 0x5b-0x5d */
257	63.8k	"{\xff"; /* 0x7b-0xff */
258	63.8k	const char *buf_start = buf;
259	63.8k	int found;
260	63.8k	buf = findchar_fast(buf, buf_end, ranges, sizeof(ranges) - 1, &found);
261	63.8k	if (!found) {
262	63.8k	CHECK_EOF();
263	63.5k	}
264	774k	while (1) {
265	774k	if (*buf == next_char) {
266	62.9k	break;
267	711k	} else if (!token_char_map[(unsigned char)*buf]) {
268	315	*ret = -1;
269	315	return NULL;
270	315	}
271	710k	++buf;
272	710k	CHECK_EOF();
273	710k	}
274	62.9k	*token = buf_start;
275	62.9k	*token_len = buf - buf_start;
276	62.9k	return buf;
277	63.5k	}
278
279		/* returned pointer is always within [buf, buf_end), or null */
280		static const char parse_http_version(const char buf, const char buf_end, int minor_version, int *ret)
281	16.7k	{
282		/* we want at least [HTTP/1.<two chars>] to try to parse */
283	16.7k	if (buf_end - buf < 9) {
284	111	*ret = -2;
285	111	return NULL;
286	111	}
287	16.6k	EXPECT_CHAR_NO_CHECK('H');
288	16.5k	EXPECT_CHAR_NO_CHECK('T');
289	16.5k	EXPECT_CHAR_NO_CHECK('T');
290	16.5k	EXPECT_CHAR_NO_CHECK('P');
291	16.5k	EXPECT_CHAR_NO_CHECK('/');
292	16.5k	EXPECT_CHAR_NO_CHECK('1');
293	12.1k	EXPECT_CHAR_NO_CHECK('.');
294	12.1k	PARSE_INT(minor_version, 1);
295	12.1k	return buf;
296	12.1k	}
297
298		static const char parse_headers(const char buf, const char buf_end, struct phr_header headers, size_t *num_headers,
299		size_t max_headers, int *ret)
300	12.1k	{
301	61.3k	for (;; ++*num_headers) {
302	61.3k	CHECK_EOF();
303	61.1k	if (*buf == '\015') {
304	3.39k	++buf;
305	3.39k	EXPECT_CHAR('\012');
306	3.35k	break;
307	57.7k	} else if (*buf == '\012') {
308	7.82k	++buf;
309	7.82k	break;
310	7.82k	}
311	49.9k	if (*num_headers == max_headers) {
312	1	*ret = -1;
313	1	return NULL;
314	1	}
315	49.9k	if (!(num_headers != 0 && (buf == ' ' \|\| *buf == '\t'))) {
316		/* parsing name, but do not discard SP before colon, see
317		* http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */
318	48.4k	if ((buf = parse_token(buf, buf_end, &headers[num_headers].name, &headers[num_headers].name_len, ':', ret)) == NULL) {
319	162	return NULL;
320	162	}
321	48.3k	if (headers[*num_headers].name_len == 0) {
322	3	*ret = -1;
323	3	return NULL;
324	3	}
325	48.3k	++buf;
326	55.0k	for (;; ++buf) {
327	55.0k	CHECK_EOF();
328	55.0k	if (!(buf == ' ' \|\| buf == '\t')) {
329	48.2k	break;
330	48.2k	}
331	55.0k	}
332	48.3k	} else {
333	1.47k	headers[*num_headers].name = NULL;
334	1.47k	headers[*num_headers].name_len = 0;
335	1.47k	}
336	49.7k	const char *value;
337	49.7k	size_t value_len;
338	49.7k	if ((buf = get_token_to_eol(buf, buf_end, &value, &value_len, ret)) == NULL) {
339	437	return NULL;
340	437	}
341		/* remove trailing SPs and HTABs */
342	49.2k	const char *value_end = value + value_len;
343	51.6k	for (; value_end != value; --value_end) {
344	18.8k	const char c = *(value_end - 1);
345	18.8k	if (!(c == ' ' \|\| c == '\t')) {
346	16.4k	break;
347	16.4k	}
348	18.8k	}
349	49.2k	headers[*num_headers].value = value;
350	49.2k	headers[*num_headers].value_len = value_end - value;
351	49.2k	}
352	11.1k	return buf;
353	12.1k	}
354
355		static const char parse_request(const char buf, const char buf_end, const char method, size_t method_len, const char **path,
356		size_t path_len, int minor_version, struct phr_header headers, size_t num_headers,
357		size_t max_headers, int *ret)
358	15.3k	{
359		/* skip first empty line (some clients add CRLF after POST content) */
360	15.3k	CHECK_EOF();
361	15.3k	if (*buf == '\015') {
362	99	++buf;
363	120	EXPECT_CHAR('\012');
364	15.2k	} else if (*buf == '\012') {
365	1.24k	++buf;
366	1.24k	}
367
368		/* parse request line */
369	15.3k	if ((buf = parse_token(buf, buf_end, method, method_len, ' ', ret)) == NULL) {
370	751	return NULL;
371	751	}
372	14.8k	do {
373	14.8k	++buf;
374	14.8k	CHECK_EOF();
375	14.7k	} while (*buf == ' ');
376	14.5k	ADVANCE_TOKEN(path, path_len);
377	14.4k	do {
378	14.4k	++buf;
379	14.4k	CHECK_EOF();
380	14.4k	} while (*buf == ' ');
381	14.2k	if (method_len == 0 \|\| path_len == 0) {
382	356	*ret = -1;
383	356	return NULL;
384	356	}
385	13.8k	if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
386	4.59k	return NULL;
387	4.59k	}
388	9.26k	if (*buf == '\015') {
389	1.19k	++buf;
390	2.36k	EXPECT_CHAR('\012');
391	8.07k	} else if (*buf == '\012') {
392	8.05k	++buf;
393	8.05k	} else {
394	23	*ret = -1;
395	23	return NULL;
396	23	}
397
398	9.23k	return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
399	9.26k	}
400
401		int phr_parse_request(const char buf_start, size_t len, const char method, size_t method_len, const char **path,
402		size_t path_len, int minor_version, struct phr_header headers, size_t num_headers, size_t last_len)
403	15.6k	{
404	15.6k	const char buf = buf_start, buf_end = buf_start + len;
405	15.6k	size_t max_headers = *num_headers;
406	15.6k	int r;
407
408	15.6k	*method = NULL;
409	15.6k	*method_len = 0;
410	15.6k	*path = NULL;
411	15.6k	*path_len = 0;
412	15.6k	*minor_version = -1;
413	15.6k	*num_headers = 0;
414
415		/* if last_len != 0, check if the request is complete (a fast countermeasure
416		againt slowloris */
417	15.6k	if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
418	268	return r;
419	268	}
420
421	15.3k	if ((buf = parse_request(buf, buf_end, method, method_len, path, path_len, minor_version, headers, num_headers, max_headers,
422	15.3k	&r)) == NULL) {
423	7.08k	return r;
424	7.08k	}
425
426	8.30k	return (int)(buf - buf_start);
427	15.3k	}
428
429		static const char parse_response(const char buf, const char buf_end, int minor_version, int status, const char *msg,
430		size_t msg_len, struct phr_header headers, size_t num_headers, size_t max_headers, int ret)
431	2.87k	{
432		/* parse "HTTP/1.x" */
433	2.87k	if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
434	0	return NULL;
435	0	}
436		/* skip space */
437	2.87k	if (*buf != ' ') {
438	0	*ret = -1;
439	0	return NULL;
440	0	}
441	2.87k	do {
442	2.87k	++buf;
443	2.87k	CHECK_EOF();
444	2.87k	} while (*buf == ' ');
445		/* parse status code, we want at least [:digit:][:digit:][:digit:]<other char> to try to parse */
446	2.87k	if (buf_end - buf < 4) {
447	0	*ret = -2;
448	0	return NULL;
449	0	}
450	2.87k	PARSE_INT_3(status);
451
452		/* get message including preceding space */
453	2.87k	if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) {
454	0	return NULL;
455	0	}
456	2.87k	if (*msg_len == 0) {
457		/* ok */
458	2.87k	} else if (**msg == ' ') {
459		/* Remove preceding space. Successful return from `get_token_to_eol` guarantees that we would hit something other than SP
460		* before running past the end of the given buffer. */
461	2.87k	do {
462	2.87k	++*msg;
463	2.87k	--*msg_len;
464	2.87k	} while (**msg == ' ');
465	2.87k	} else {
466		/* garbage found after status code */
467	0	*ret = -1;
468	0	return NULL;
469	0	}
470
471	2.87k	return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
472	2.87k	}
473
474		int phr_parse_response(const char buf_start, size_t len, int minor_version, int status, const char msg, size_t msg_len,
475		struct phr_header headers, size_t num_headers, size_t last_len)
476	2.87k	{
477	2.87k	const char buf = buf_start, buf_end = buf + len;
478	2.87k	size_t max_headers = *num_headers;
479	2.87k	int r;
480
481	2.87k	*minor_version = -1;
482	2.87k	*status = 0;
483	2.87k	*msg = NULL;
484	2.87k	*msg_len = 0;
485	2.87k	*num_headers = 0;
486
487		/* if last_len != 0, check if the response is complete (a fast countermeasure
488		against slowloris */
489	2.87k	if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
490	0	return r;
491	0	}
492
493	2.87k	if ((buf = parse_response(buf, buf_end, minor_version, status, msg, msg_len, headers, num_headers, max_headers, &r)) == NULL) {
494	0	return r;
495	0	}
496
497	2.87k	return (int)(buf - buf_start);
498	2.87k	}
499
500		int phr_parse_headers(const char buf_start, size_t len, struct phr_header headers, size_t *num_headers, size_t last_len)
501	0	{
502	0	const char buf = buf_start, buf_end = buf + len;
503	0	size_t max_headers = *num_headers;
504	0	int r;
505
506	0	*num_headers = 0;
507
508		/* if last_len != 0, check if the response is complete (a fast countermeasure
509		against slowloris */
510	0	if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
511	0	return r;
512	0	}
513
514	0	if ((buf = parse_headers(buf, buf_end, headers, num_headers, max_headers, &r)) == NULL) {
515	0	return r;
516	0	}
517
518	0	return (int)(buf - buf_start);
519	0	}
520
521		enum {
522		CHUNKED_IN_CHUNK_SIZE,
523		CHUNKED_IN_CHUNK_EXT,
524		CHUNKED_IN_CHUNK_DATA,
525		CHUNKED_IN_CHUNK_CRLF,
526		CHUNKED_IN_TRAILERS_LINE_HEAD,
527		CHUNKED_IN_TRAILERS_LINE_MIDDLE
528		};
529
530		static int decode_hex(int ch)
531	5.65k	{
532	5.65k	if ('0' <= ch && ch <= '9') {
533	1.66k	return ch - '0';
534	3.98k	} else if ('A' <= ch && ch <= 'F') {
535	981	return ch - 'A' + 0xa;
536	3.00k	} else if ('a' <= ch && ch <= 'f') {
537	866	return ch - 'a' + 0xa;
538	2.13k	} else {
539	2.13k	return -1;
540	2.13k	}
541	5.65k	}
542
543		ssize_t phr_decode_chunked(struct phr_chunked_decoder decoder, char buf, size_t *_bufsz)
544	1.11k	{
545	1.11k	size_t dst = 0, src = 0, bufsz = *_bufsz;
546	1.11k	ssize_t ret = -2; /* incomplete */
547
548	1.11k	decoder->_total_read += bufsz;
549
550	4.82k	while (1) {
551	4.82k	switch (decoder->_state) {
552	2.23k	case CHUNKED_IN_CHUNK_SIZE:
553	5.74k	for (;; ++src) {
554	5.74k	int v;
555	5.74k	if (src == bufsz)
556	94	goto Exit;
557	5.65k	if ((v = decode_hex(buf[src])) == -1) {
558	2.13k	if (decoder->_hex_count == 0) {
559	18	ret = -1;
560	18	goto Exit;
561	18	}
562		/* the only characters that may appear after the chunk size are BWS, semicolon, or CRLF */
563	2.11k	switch (buf[src]) {
564	539	case ' ':
565	560	case '\011':
566	632	case ';':
567	1.87k	case '\012':
568	2.10k	case '\015':
569	2.10k	break;
570	9	default:
571	9	ret = -1;
572	9	goto Exit;
573	2.11k	}
574	2.10k	break;
575	2.11k	}
576	3.51k	if (decoder->_hex_count == sizeof(size_t) * 2) {
577	1	ret = -1;
578	1	goto Exit;
579	1	}
580	3.51k	decoder->bytes_left_in_chunk = decoder->bytes_left_in_chunk * 16 + v;
581	3.51k	++decoder->_hex_count;
582	3.51k	}
583	2.10k	decoder->_hex_count = 0;
584	2.10k	decoder->_state = CHUNKED_IN_CHUNK_EXT;
585		/* fallthru */
586	2.18k	case CHUNKED_IN_CHUNK_EXT:
587		/* RFC 7230 A.2 "Line folding in chunk extensions is disallowed" */
588	38.0k	for (;; ++src) {
589	38.0k	if (src == bufsz)
590	102	goto Exit;
591	37.9k	if (buf[src] == '\012')
592	2.08k	break;
593	37.9k	}
594	2.08k	++src;
595	2.08k	if (decoder->bytes_left_in_chunk == 0) {
596	289	if (decoder->consume_trailer) {
597	289	decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
598	289	break;
599	289	} else {
600	0	goto Complete;
601	0	}
602	289	}
603	1.79k	decoder->_state = CHUNKED_IN_CHUNK_DATA;
604		/* fallthru */
605	1.92k	case CHUNKED_IN_CHUNK_DATA: {
606	1.92k	size_t avail = bufsz - src;
607	1.92k	if (avail < decoder->bytes_left_in_chunk) {
608	280	if (dst != src)
609	226	memmove(buf + dst, buf + src, avail);
610	280	src += avail;
611	280	dst += avail;
612	280	decoder->bytes_left_in_chunk -= avail;
613	280	goto Exit;
614	280	}
615	1.64k	if (dst != src)
616	1.56k	memmove(buf + dst, buf + src, decoder->bytes_left_in_chunk);
617	1.64k	src += decoder->bytes_left_in_chunk;
618	1.64k	dst += decoder->bytes_left_in_chunk;
619	1.64k	decoder->bytes_left_in_chunk = 0;
620	1.64k	decoder->_state = CHUNKED_IN_CHUNK_CRLF;
621	1.64k	}
622		/* fallthru */
623	1.73k	case CHUNKED_IN_CHUNK_CRLF:
624	1.94k	for (;; ++src) {
625	1.94k	if (src == bufsz)
626	123	goto Exit;
627	1.81k	if (buf[src] != '\015')
628	1.61k	break;
629	1.81k	}
630	1.61k	if (buf[src] != '\012') {
631	25	ret = -1;
632	25	goto Exit;
633	25	}
634	1.58k	++src;
635	1.58k	decoder->_state = CHUNKED_IN_CHUNK_SIZE;
636	1.58k	break;
637	2.20k	case CHUNKED_IN_TRAILERS_LINE_HEAD:
638	2.48k	for (;; ++src) {
639	2.48k	if (src == bufsz)
640	102	goto Exit;
641	2.37k	if (buf[src] != '\015')
642	2.10k	break;
643	2.37k	}
644	2.10k	if (buf[src++] == '\012')
645	214	goto Complete;
646	1.88k	decoder->_state = CHUNKED_IN_TRAILERS_LINE_MIDDLE;
647		/* fallthru */
648	1.98k	case CHUNKED_IN_TRAILERS_LINE_MIDDLE:
649	32.0k	for (;; ++src) {
650	32.0k	if (src == bufsz)
651	142	goto Exit;
652	31.9k	if (buf[src] == '\012')
653	1.83k	break;
654	31.9k	}
655	1.83k	++src;
656	1.83k	decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
657	1.83k	break;
658	0	default:
659	0	assert(!"decoder is corrupt");
660	4.82k	}
661	4.82k	}
662
663	214	Complete:
664	214	ret = bufsz - src;
665	1.11k	Exit:
666	1.11k	if (dst != src)
667	1.03k	memmove(buf + dst, buf + src, bufsz - src);
668	1.11k	*_bufsz = dst;
669		/* if incomplete but the overhead of the chunked encoding is >=100KB and >80%, signal an error */
670	1.11k	if (ret == -2) {
671	843	decoder->_total_overhead += bufsz - dst;
672	843	if (decoder->_total_overhead >= 100 * 1024 && decoder->_total_read - decoder->_total_overhead < decoder->_total_read / 4)
673	0	ret = -1;
674	843	}
675	1.11k	return ret;
676	214	}
677
678		int phr_decode_chunked_is_in_data(struct phr_chunked_decoder *decoder)
679	0	{
680	0	return decoder->_state == CHUNKED_IN_CHUNK_DATA;
681	0	}
682
683		#undef CHECK_EOF
684		#undef EXPECT_CHAR
685		#undef ADVANCE_TOKEN