/src/h2o/deps/picohttpparser/picohttpparser.c

Source (jump to first uncovered line)
/*
 * Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase,
 *                         Shigeo Mitsunari
 *
 * The software is licensed under either the MIT License (below) or the Perl
 * license.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal in the Software without restriction, including without limitation the
 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 * sell copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 */

#include <assert.h>
#include <stddef.h>
#include <string.h>
#ifdef __SSE4_2__
#ifdef _MSC_VER
#include <nmmintrin.h>
#else
#include <x86intrin.h>
#endif
#endif
#include "picohttpparser.h"

/* $Id: 1b172063e1b60ba47601ed04c11390cc53235a01 $ */

#if __GNUC__ >= 3
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
#else
#define likely(x) (x)
#define unlikely(x) (x)
#endif

#ifdef _MSC_VER
#define ALIGNED(n) _declspec(align(n))
#else
#define ALIGNED(n) __attribute__((aligned(n)))
#endif

#define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)-040u < 0137u)

#define CHECK_EOF()                                                                                                                \
    if (buf == buf_end) {                                                                                                          \
        *ret = -2;                                                                                                                 \
        return NULL;                                                                                                               \
    }

#define EXPECT_CHAR_NO_CHECK(ch)                                                                                                   \
    if (*buf++ != ch) {                                                                                                            \
        *ret = -1;                                                                                                                 \
        return NULL;                                                                                                               \
    }

#define EXPECT_CHAR(ch)                                                                                                            \
    CHECK_EOF();                                                                                                                   \
    EXPECT_CHAR_NO_CHECK(ch);

#define ADVANCE_TOKEN(tok, toklen)                                                                                                 \
    do {                                                                                                                           \
        const char *tok_start = buf;                                                                                               \
        static const char ALIGNED(16) ranges2[] = "\000\040\177\177";                                                              \
        int found2;                                                                                                                \
        buf = findchar_fast(buf, buf_end, ranges2, sizeof(ranges2) - 1, &found2);                                                  \
        if (!found2) {                                                                                                             \
            CHECK_EOF();                                                                                                           \
        }                                                                                                                          \
        while (1) {                                                                                                                \
            if (*buf == ' ') {                                                                                                     \
                break;                                                                                                             \
            } else if (unlikely(!IS_PRINTABLE_ASCII(*buf))) {                                                                      \
                if ((unsigned char)*buf < '\040' || *buf == '\177') {                                                              \
                    *ret = -1;                                                                                                     \
                    return NULL;                                                                                                   \
                }                                                                                                                  \
            }                                                                                                                      \
            ++buf;                                                                                                                 \
            CHECK_EOF();                                                                                                           \
        }                                                                                                                          \
        tok = tok_start;                                                                                                           \
        toklen = buf - tok_start;                                                                                                  \
    } while (0)

static const char *token_char_map = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
                                    "\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
                                    "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
                                    "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";

static const char *findchar_fast(const char *buf, const char *buf_end, const char *ranges, size_t ranges_size, int *found)
{
    *found = 0;
#if __SSE4_2__
    if (likely(buf_end - buf >= 16)) {
        __m128i ranges16 = _mm_loadu_si128((const __m128i *)ranges);

        size_t left = (buf_end - buf) & ~15;
        do {
            __m128i b16 = _mm_loadu_si128((const __m128i *)buf);
            int r = _mm_cmpestri(ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS);
            if (unlikely(r != 16)) {
                buf += r;
                *found = 1;
                break;
            }
            buf += 16;
            left -= 16;
        } while (likely(left != 0));
    }
#else
    /* suppress unused parameter warning */
    (void)buf_end;
    (void)ranges;
    (void)ranges_size;
#endif
    return buf;
}

static const char *get_token_to_eol(const char *buf, const char *buf_end, const char **token, size_t *token_len, int *ret)
{
    const char *token_start = buf;

#ifdef __SSE4_2__
    static const char ranges1[] = "\0\010"
                                  /* allow HT */
                                  "\012\037"
                                  /* allow SP and up to but not including DEL */
                                  "\177\177"
        /* allow chars w. MSB set */
        ;
    int found;
    buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
    if (found)
        goto FOUND_CTL;
#else
    /* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */
    while (likely(buf_end - buf >= 8)) {
#define DOIT()                                                                                                                     \
    do {                                                                                                                           \
        if (unlikely(!IS_PRINTABLE_ASCII(*buf)))                                                                                   \
            goto NonPrintable;                                                                                                     \
        ++buf;                                                                                                                     \
    } while (0)
        DOIT();
        DOIT();
        DOIT();
        DOIT();
        DOIT();
        DOIT();
        DOIT();
        DOIT();
#undef DOIT
        continue;
    NonPrintable:
        if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
            goto FOUND_CTL;
        }
        ++buf;
    }
#endif
    for (;; ++buf) {
        CHECK_EOF();
        if (unlikely(!IS_PRINTABLE_ASCII(*buf))) {
            if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
                goto FOUND_CTL;
            }
        }
    }
FOUND_CTL:
    if (likely(*buf == '\015')) {
        ++buf;
        EXPECT_CHAR('\012');
        *token_len = buf - 2 - token_start;
    } else if (*buf == '\012') {
        *token_len = buf - token_start;
        ++buf;
    } else {
        *ret = -1;
        return NULL;
    }
    *token = token_start;

    return buf;
}

static const char *is_complete(const char *buf, const char *buf_end, size_t last_len, int *ret)
{
    int ret_cnt = 0;
    buf = last_len < 3 ? buf : buf + last_len - 3;

    while (1) {
        CHECK_EOF();
        if (*buf == '\015') {
            ++buf;
            CHECK_EOF();
            EXPECT_CHAR('\012');
            ++ret_cnt;
        } else if (*buf == '\012') {
            ++buf;
            ++ret_cnt;
        } else {
            ++buf;
            ret_cnt = 0;
        }
        if (ret_cnt == 2) {
            return buf;
        }
    }

    *ret = -2;
    return NULL;
}

#define PARSE_INT(valp_, mul_)                                                                                                     \
    if (*buf < '0' || '9' < *buf) {                                                                                                \
        buf++;                                                                                                                     \
        *ret = -1;                                                                                                                 \
        return NULL;                                                                                                               \
    }                                                                                                                              \
    *(valp_) = (mul_) * (*buf++ - '0');

#define PARSE_INT_3(valp_)                                                                                                         \
    do {                                                                                                                           \
        int res_ = 0;                                                                                                              \
        PARSE_INT(&res_, 100)                                                                                                      \
        *valp_ = res_;                                                                                                             \
        PARSE_INT(&res_, 10)                                                                                                       \
        *valp_ += res_;                                                                                                            \
        PARSE_INT(&res_, 1)                                                                                                        \
        *valp_ += res_;                                                                                                            \
    } while (0)

/* returned pointer is always within [buf, buf_end), or null */
static const char *parse_http_version(const char *buf, const char *buf_end, int *minor_version, int *ret)
{
    /* we want at least [HTTP/1.<two chars>] to try to parse */
    if (buf_end - buf < 9) {
        *ret = -2;
        return NULL;
    }
    EXPECT_CHAR_NO_CHECK('H');
    EXPECT_CHAR_NO_CHECK('T');
    EXPECT_CHAR_NO_CHECK('T');
    EXPECT_CHAR_NO_CHECK('P');
    EXPECT_CHAR_NO_CHECK('/');
    EXPECT_CHAR_NO_CHECK('1');
    EXPECT_CHAR_NO_CHECK('.');
    PARSE_INT(minor_version, 1);
    return buf;
}

static const char *parse_headers(const char *buf, const char *buf_end, struct phr_header *headers, size_t *num_headers,
                                 size_t max_headers, int *ret)
{
    for (;; ++*num_headers) {
        CHECK_EOF();
        if (*buf == '\015') {
            ++buf;
            EXPECT_CHAR('\012');
            break;
        } else if (*buf == '\012') {
            ++buf;
            break;
        }
        if (*num_headers == max_headers) {
            *ret = -1;
            return NULL;
        }
        if (!(*num_headers != 0 && (*buf == ' ' || *buf == '\t'))) {
            /* parsing name, but do not discard SP before colon, see
             * http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */
            headers[*num_headers].name = buf;
            static const char ALIGNED(16) ranges1[] = "\x00 "  /* control chars and up to SP */
                                                      "\"\""   /* 0x22 */
                                                      "()"     /* 0x28,0x29 */
                                                      ",,"     /* 0x2c */
                                                      "//"     /* 0x2f */
                                                      ":@"     /* 0x3a-0x40 */
                                                      "[]"     /* 0x5b-0x5d */
                                                      "{\377"; /* 0x7b-0xff */
            int found;
            buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
            if (!found) {
                CHECK_EOF();
            }
            while (1) {
                if (*buf == ':') {
                    break;
                } else if (!token_char_map[(unsigned char)*buf]) {
                    *ret = -1;
                    return NULL;
                }
                ++buf;
                CHECK_EOF();
            }
            if ((headers[*num_headers].name_len = buf - headers[*num_headers].name) == 0) {
                *ret = -1;
                return NULL;
            }
            ++buf;
            for (;; ++buf) {
                CHECK_EOF();
                if (!(*buf == ' ' || *buf == '\t')) {
                    break;
                }
            }
        } else {
            headers[*num_headers].name = NULL;
            headers[*num_headers].name_len = 0;
        }
        const char *value;
        size_t value_len;
        if ((buf = get_token_to_eol(buf, buf_end, &value, &value_len, ret)) == NULL) {
            return NULL;
        }
        /* remove trailing SPs and HTABs */
        const char *value_end = value + value_len;
        for (; value_end != value; --value_end) {
            const char c = *(value_end - 1);
            if (!(c == ' ' || c == '\t')) {
                break;
            }
        }
        headers[*num_headers].value = value;
        headers[*num_headers].value_len = value_end - value;
    }
    return buf;
}

static const char *parse_request(const char *buf, const char *buf_end, const char **method, size_t *method_len, const char **path,
                                 size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers,
                                 size_t max_headers, int *ret)
{
    /* skip first empty line (some clients add CRLF after POST content) */
    CHECK_EOF();
    if (*buf == '\015') {
        ++buf;
        EXPECT_CHAR('\012');
    } else if (*buf == '\012') {
        ++buf;
    }

    /* parse request line */
    ADVANCE_TOKEN(*method, *method_len);
    ++buf;
    ADVANCE_TOKEN(*path, *path_len);
    ++buf;
    if (*method_len == 0 || *path_len == 0) {
        *ret = -1;
        return NULL;
    }
    if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
        return NULL;
    }
    if (*buf == '\015') {
        ++buf;
        EXPECT_CHAR('\012');
    } else if (*buf == '\012') {
        ++buf;
    } else {
        *ret = -1;
        return NULL;
    }

    return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
}

int phr_parse_request(const char *buf_start, size_t len, const char **method, size_t *method_len, const char **path,
                      size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers, size_t last_len)
{
    const char *buf = buf_start, *buf_end = buf_start + len;
    size_t max_headers = *num_headers;
    int r;

    *method = NULL;
    *method_len = 0;
    *path = NULL;
    *path_len = 0;
    *minor_version = -1;
    *num_headers = 0;

    /* if last_len != 0, check if the request is complete (a fast countermeasure
       againt slowloris */
    if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
        return r;
    }

    if ((buf = parse_request(buf, buf_end, method, method_len, path, path_len, minor_version, headers, num_headers, max_headers,
                             &r)) == NULL) {
        return r;
    }

    return (int)(buf - buf_start);
}

static const char *parse_response(const char *buf, const char *buf_end, int *minor_version, int *status, const char **msg,
                                  size_t *msg_len, struct phr_header *headers, size_t *num_headers, size_t max_headers, int *ret)
{
    /* parse "HTTP/1.x" */
    if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
        return NULL;
    }
    /* skip space */
    if (*buf++ != ' ') {
        *ret = -1;
        return NULL;
    }
    /* parse status code, we want at least [:digit:][:digit:][:digit:]<other char> to try to parse */
    if (buf_end - buf < 4) {
        *ret = -2;
        return NULL;
    }
    PARSE_INT_3(status);

    /* skip space */
    if (*buf++ != ' ') {
        *ret = -1;
        return NULL;
    }
    /* get message */
    if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) {
        return NULL;
    }

    return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
}

int phr_parse_response(const char *buf_start, size_t len, int *minor_version, int *status, const char **msg, size_t *msg_len,
                       struct phr_header *headers, size_t *num_headers, size_t last_len)
{
    const char *buf = buf_start, *buf_end = buf + len;
    size_t max_headers = *num_headers;
    int r;

    *minor_version = -1;
    *status = 0;
    *msg = NULL;
    *msg_len = 0;
    *num_headers = 0;

    /* if last_len != 0, check if the response is complete (a fast countermeasure
       against slowloris */
    if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
        return r;
    }

    if ((buf = parse_response(buf, buf_end, minor_version, status, msg, msg_len, headers, num_headers, max_headers, &r)) == NULL) {
        return r;
    }

    return (int)(buf - buf_start);
}

int phr_parse_headers(const char *buf_start, size_t len, struct phr_header *headers, size_t *num_headers, size_t last_len)
{
    const char *buf = buf_start, *buf_end = buf + len;
    size_t max_headers = *num_headers;
    int r;

    *num_headers = 0;

    /* if last_len != 0, check if the response is complete (a fast countermeasure
       against slowloris */
    if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
        return r;
    }

    if ((buf = parse_headers(buf, buf_end, headers, num_headers, max_headers, &r)) == NULL) {
        return r;
    }

    return (int)(buf - buf_start);
}

enum {
    CHUNKED_IN_CHUNK_SIZE,
    CHUNKED_IN_CHUNK_EXT,
    CHUNKED_IN_CHUNK_DATA,
    CHUNKED_IN_CHUNK_CRLF,
    CHUNKED_IN_TRAILERS_LINE_HEAD,
    CHUNKED_IN_TRAILERS_LINE_MIDDLE
};

static int decode_hex(int ch)
{
    if ('0' <= ch && ch <= '9') {
        return ch - '0';
    } else if ('A' <= ch && ch <= 'F') {
        return ch - 'A' + 0xa;
    } else if ('a' <= ch && ch <= 'f') {
        return ch - 'a' + 0xa;
    } else {
        return -1;
    }
}

ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *_bufsz)
{
    size_t dst = 0, src = 0, bufsz = *_bufsz;
    ssize_t ret = -2; /* incomplete */

    while (1) {
        switch (decoder->_state) {
        case CHUNKED_IN_CHUNK_SIZE:
            for (;; ++src) {
                int v;
                if (src == bufsz)
                    goto Exit;
                if ((v = decode_hex(buf[src])) == -1) {
                    if (decoder->_hex_count == 0) {
                        ret = -1;
                        goto Exit;
                    }
                    break;
                }
                if (decoder->_hex_count == sizeof(size_t) * 2) {
                    ret = -1;
                    goto Exit;
                }
                decoder->bytes_left_in_chunk = decoder->bytes_left_in_chunk * 16 + v;
                ++decoder->_hex_count;
            }
            decoder->_hex_count = 0;
            decoder->_state = CHUNKED_IN_CHUNK_EXT;
        /* fallthru */
        case CHUNKED_IN_CHUNK_EXT:
            /* RFC 7230 A.2 "Line folding in chunk extensions is disallowed" */
            for (;; ++src) {
                if (src == bufsz)
                    goto Exit;
                if (buf[src] == '\012')
                    break;
            }
            ++src;
            if (decoder->bytes_left_in_chunk == 0) {
                if (decoder->consume_trailer) {
                    decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
                    break;
                } else {
                    goto Complete;
                }
            }
            decoder->_state = CHUNKED_IN_CHUNK_DATA;
        /* fallthru */
        case CHUNKED_IN_CHUNK_DATA: {
            size_t avail = bufsz - src;
            if (avail < decoder->bytes_left_in_chunk) {
                if (dst != src)
                    memmove(buf + dst, buf + src, avail);
                src += avail;
                dst += avail;
                decoder->bytes_left_in_chunk -= avail;
                goto Exit;
            }
            if (dst != src)
                memmove(buf + dst, buf + src, decoder->bytes_left_in_chunk);
            src += decoder->bytes_left_in_chunk;
            dst += decoder->bytes_left_in_chunk;
            decoder->bytes_left_in_chunk = 0;
            decoder->_state = CHUNKED_IN_CHUNK_CRLF;
        }
        /* fallthru */
        case CHUNKED_IN_CHUNK_CRLF:
            for (;; ++src) {
                if (src == bufsz)
                    goto Exit;
                if (buf[src] != '\015')
                    break;
            }
            if (buf[src] != '\012') {
                ret = -1;
                goto Exit;
            }
            ++src;
            decoder->_state = CHUNKED_IN_CHUNK_SIZE;
            break;
        case CHUNKED_IN_TRAILERS_LINE_HEAD:
            for (;; ++src) {
                if (src == bufsz)
                    goto Exit;
                if (buf[src] != '\015')
                    break;
            }
            if (buf[src++] == '\012')
                goto Complete;
            decoder->_state = CHUNKED_IN_TRAILERS_LINE_MIDDLE;
        /* fallthru */
        case CHUNKED_IN_TRAILERS_LINE_MIDDLE:
            for (;; ++src) {
                if (src == bufsz)
                    goto Exit;
                if (buf[src] == '\012')
                    break;
            }
            ++src;
            decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
            break;
        default:
            assert(!"decoder is corrupt");
        }
    }

Complete:
    ret = bufsz - src;
Exit:
    if (dst != src)
        memmove(buf + dst, buf + src, bufsz - src);
    *_bufsz = dst;
    return ret;
}

int phr_decode_chunked_is_in_data(struct phr_chunked_decoder *decoder)
{
    return decoder->_state == CHUNKED_IN_CHUNK_DATA;
}

#undef CHECK_EOF
#undef EXPECT_CHAR
#undef ADVANCE_TOKEN

Coverage Report

Created: 2023-06-07 06:21

Line	Count	Source (jump to first uncovered line)
1		/*
2		* Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase,
3		* Shigeo Mitsunari
4		*
5		* The software is licensed under either the MIT License (below) or the Perl
6		* license.
7		*
8		* Permission is hereby granted, free of charge, to any person obtaining a copy
9		* of this software and associated documentation files (the "Software"), to
10		* deal in the Software without restriction, including without limitation the
11		* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
12		* sell copies of the Software, and to permit persons to whom the Software is
13		* furnished to do so, subject to the following conditions:
14		*
15		* The above copyright notice and this permission notice shall be included in
16		* all copies or substantial portions of the Software.
17		*
18		* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19		* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20		* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21		* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22		* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23		* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24		* IN THE SOFTWARE.
25		*/
26
27		#include <assert.h>
28		#include <stddef.h>
29		#include <string.h>
30		#ifdef __SSE4_2__
31		#ifdef _MSC_VER
32		#include <nmmintrin.h>
33		#else
34		#include <x86intrin.h>
35		#endif
36		#endif
37		#include "picohttpparser.h"
38
39		/* $Id: 1b172063e1b60ba47601ed04c11390cc53235a01 $ */
40
41		#if __GNUC__ >= 3
42	6.64k	#define likely(x) __builtin_expect(!!(x), 1)
43	5.97k	#define unlikely(x) __builtin_expect(!!(x), 0)
44		#else
45		#define likely(x) (x)
46		#define unlikely(x) (x)
47		#endif
48
49		#ifdef _MSC_VER
50		#define ALIGNED(n) _declspec(align(n))
51		#else
52		#define ALIGNED(n) __attribute__((aligned(n)))
53		#endif
54
55		#define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)-040u < 0137u)
56
57		#define CHECK_EOF() \
58	11.9k	if (buf == buf_end) { \
59	0	*ret = -2; \
60	0	return NULL; \
61	0	}
62
63		#define EXPECT_CHAR_NO_CHECK(ch) \
64	6.64k	if (*buf++ != ch) { \
65	0	*ret = -1; \
66	0	return NULL; \
67	0	}
68
69		#define EXPECT_CHAR(ch) \
70	1.99k	CHECK_EOF(); \
71	1.99k	EXPECT_CHAR_NO_CHECK(ch);
72
73		#define ADVANCE_TOKEN(tok, toklen) \
74	0	do { \
75	0	const char *tok_start = buf; \
76	0	static const char ALIGNED(16) ranges2[] = "\000\040\177\177"; \
77	0	int found2; \
78	0	buf = findchar_fast(buf, buf_end, ranges2, sizeof(ranges2) - 1, &found2); \
79	0	if (!found2) { \
80	0	CHECK_EOF(); \
81	0	} \
82	0	while (1) { \
83	0	if (*buf == ' ') { \
84	0	break; \
85	0	} else if (unlikely(!IS_PRINTABLE_ASCII(*buf))) { \
86	0	if ((unsigned char)buf < '\040' \|\| buf == '\177') { \
87	0	*ret = -1; \
88	0	return NULL; \
89	0	} \
90	0	} \
91	0	++buf; \
92	0	CHECK_EOF(); \
93	0	} \
94	0	tok = tok_start; \
95	0	toklen = buf - tok_start; \
96	0	} while (0)
97
98		static const char *token_char_map = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
99		"\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
100		"\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
101		"\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
102		"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
103		"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
104		"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
105		"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
106
107		static const char findchar_fast(const char buf, const char buf_end, const char ranges, size_t ranges_size, int *found)
108	664	{
109	664	*found = 0;
110		#if __SSE4_2__
111		if (likely(buf_end - buf >= 16)) {
112		__m128i ranges16 = _mm_loadu_si128((const __m128i *)ranges);
113
114		size_t left = (buf_end - buf) & ~15;
115		do {
116		__m128i b16 = _mm_loadu_si128((const __m128i *)buf);
117		int r = _mm_cmpestri(ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT \| _SIDD_CMP_RANGES \| _SIDD_UBYTE_OPS);
118		if (unlikely(r != 16)) {
119		buf += r;
120		*found = 1;
121		break;
122		}
123		buf += 16;
124		left -= 16;
125		} while (likely(left != 0));
126		}
127		#else
128		/* suppress unused parameter warning */
129	664	(void)buf_end;
130	664	(void)ranges;
131	664	(void)ranges_size;
132	664	#endif
133	664	return buf;
134	664	}
135
136		static const char get_token_to_eol(const char buf, const char buf_end, const char token, size_t token_len, int *ret)
137	1.32k	{
138	1.32k	const char *token_start = buf;
139
140		#ifdef __SSE4_2__
141		static const char ranges1[] = "\0\010"
142		/* allow HT */
143		"\012\037"
144		/* allow SP and up to but not including DEL */
145		"\177\177"
146		/* allow chars w. MSB set */
147		;
148		int found;
149		buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
150		if (found)
151		goto FOUND_CTL;
152		#else
153		/* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */
154	1.32k	while (likely(buf_end - buf >= 8)) {
155	1.32k	#define DOIT() \
156	5.97k	do { \
157	5.97k	if (unlikely(!IS_PRINTABLE_ASCII(*buf))) \
158	5.97k	goto NonPrintable; \
159	5.97k	++buf; \
160	4.64k	} while (0)
161	1.32k	DOIT();
162	1.32k	DOIT();
163	1.32k	DOIT();
164	664	DOIT();
165	664	DOIT();
166	664	DOIT();
167	0	DOIT();
168	0	DOIT();
169	0	#undef DOIT
170	0	continue;
171	1.32k	NonPrintable:
172	1.32k	if ((likely((unsigned char)buf < '\040') && likely(buf != '\011')) \|\| unlikely(*buf == '\177')) {
173	1.32k	goto FOUND_CTL;
174	1.32k	}
175	0	++buf;
176	0	}
177	0	#endif
178	0	for (;; ++buf) {
179	0	CHECK_EOF();
180	0	if (unlikely(!IS_PRINTABLE_ASCII(*buf))) {
181	0	if ((likely((unsigned char)buf < '\040') && likely(buf != '\011')) \|\| unlikely(*buf == '\177')) {
182	0	goto FOUND_CTL;
183	0	}
184	0	}
185	0	}
186	1.32k	FOUND_CTL:
187	1.32k	if (likely(*buf == '\015')) {
188	1.32k	++buf;
189	2.65k	EXPECT_CHAR('\012');
190	2.65k	*token_len = buf - 2 - token_start;
191	2.65k	} else if (*buf == '\012') {
192	0	*token_len = buf - token_start;
193	0	++buf;
194	0	} else {
195	0	*ret = -1;
196	0	return NULL;
197	0	}
198	1.32k	*token = token_start;
199
200	1.32k	return buf;
201	1.32k	}
202
203		static const char is_complete(const char buf, const char buf_end, size_t last_len, int ret)
204	0	{
205	0	int ret_cnt = 0;
206	0	buf = last_len < 3 ? buf : buf + last_len - 3;
207
208	0	while (1) {
209	0	CHECK_EOF();
210	0	if (*buf == '\015') {
211	0	++buf;
212	0	CHECK_EOF();
213	0	EXPECT_CHAR('\012');
214	0	++ret_cnt;
215	0	} else if (*buf == '\012') {
216	0	++buf;
217	0	++ret_cnt;
218	0	} else {
219	0	++buf;
220	0	ret_cnt = 0;
221	0	}
222	0	if (ret_cnt == 2) {
223	0	return buf;
224	0	}
225	0	}
226
227	0	*ret = -2;
228	0	return NULL;
229	0	}
230
231		#define PARSE_INT(valp_, mul_) \
232	2.65k	if (buf < '0' \|\| '9' < buf) { \
233	0	buf++; \
234	0	*ret = -1; \
235	0	return NULL; \
236	0	} \
237	2.65k	(valp_) = (mul_) (*buf++ - '0');
238
239		#define PARSE_INT_3(valp_) \
240	664	do { \
241	664	int res_ = 0; \
242	664	PARSE_INT(&res_, 100) \
243	664	*valp_ = res_; \
244	664	PARSE_INT(&res_, 10) \
245	664	*valp_ += res_; \
246	664	PARSE_INT(&res_, 1) \
247	664	*valp_ += res_; \
248	664	} while (0)
249
250		/* returned pointer is always within [buf, buf_end), or null */
251		static const char parse_http_version(const char buf, const char buf_end, int minor_version, int *ret)
252	664	{
253		/* we want at least [HTTP/1.<two chars>] to try to parse */
254	664	if (buf_end - buf < 9) {
255	0	*ret = -2;
256	0	return NULL;
257	0	}
258	664	EXPECT_CHAR_NO_CHECK('H');
259	664	EXPECT_CHAR_NO_CHECK('T');
260	664	EXPECT_CHAR_NO_CHECK('T');
261	664	EXPECT_CHAR_NO_CHECK('P');
262	664	EXPECT_CHAR_NO_CHECK('/');
263	664	EXPECT_CHAR_NO_CHECK('1');
264	664	EXPECT_CHAR_NO_CHECK('.');
265	664	PARSE_INT(minor_version, 1);
266	664	return buf;
267	664	}
268
269		static const char parse_headers(const char buf, const char buf_end, struct phr_header headers, size_t *num_headers,
270		size_t max_headers, int *ret)
271	664	{
272	1.32k	for (;; ++*num_headers) {
273	1.32k	CHECK_EOF();
274	1.32k	if (*buf == '\015') {
275	664	++buf;
276	664	EXPECT_CHAR('\012');
277	664	break;
278	664	} else if (*buf == '\012') {
279	0	++buf;
280	0	break;
281	0	}
282	664	if (*num_headers == max_headers) {
283	0	*ret = -1;
284	0	return NULL;
285	0	}
286	664	if (!(num_headers != 0 && (buf == ' ' \|\| *buf == '\t'))) {
287		/* parsing name, but do not discard SP before colon, see
288		* http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */
289	664	headers[*num_headers].name = buf;
290	664	static const char ALIGNED(16) ranges1[] = "\x00 " /* control chars and up to SP */
291	664	"\"\"" /* 0x22 */
292	664	"()" /* 0x28,0x29 */
293	664	",," /* 0x2c */
294	664	"//" /* 0x2f */
295	664	":@" /* 0x3a-0x40 */
296	664	"[]" /* 0x5b-0x5d */
297	664	"{\377"; /* 0x7b-0xff */
298	664	int found;
299	664	buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
300	664	if (!found) {
301	664	CHECK_EOF();
302	664	}
303	7.30k	while (1) {
304	7.30k	if (*buf == ':') {
305	664	break;
306	6.64k	} else if (!token_char_map[(unsigned char)*buf]) {
307	0	*ret = -1;
308	0	return NULL;
309	0	}
310	6.64k	++buf;
311	6.64k	CHECK_EOF();
312	6.64k	}
313	664	if ((headers[num_headers].name_len = buf - headers[num_headers].name) == 0) {
314	0	*ret = -1;
315	0	return NULL;
316	0	}
317	664	++buf;
318	1.32k	for (;; ++buf) {
319	1.32k	CHECK_EOF();
320	1.32k	if (!(buf == ' ' \|\| buf == '\t')) {
321	664	break;
322	664	}
323	1.32k	}
324	664	} else {
325	0	headers[*num_headers].name = NULL;
326	0	headers[*num_headers].name_len = 0;
327	0	}
328	664	const char *value;
329	664	size_t value_len;
330	664	if ((buf = get_token_to_eol(buf, buf_end, &value, &value_len, ret)) == NULL) {
331	0	return NULL;
332	0	}
333		/* remove trailing SPs and HTABs */
334	664	const char *value_end = value + value_len;
335	664	for (; value_end != value; --value_end) {
336	664	const char c = *(value_end - 1);
337	664	if (!(c == ' ' \|\| c == '\t')) {
338	664	break;
339	664	}
340	664	}
341	664	headers[*num_headers].value = value;
342	664	headers[*num_headers].value_len = value_end - value;
343	664	}
344	664	return buf;
345	664	}
346
347		static const char parse_request(const char buf, const char buf_end, const char method, size_t method_len, const char **path,
348		size_t path_len, int minor_version, struct phr_header headers, size_t num_headers,
349		size_t max_headers, int *ret)
350	0	{
351		/* skip first empty line (some clients add CRLF after POST content) */
352	0	CHECK_EOF();
353	0	if (*buf == '\015') {
354	0	++buf;
355	0	EXPECT_CHAR('\012');
356	0	} else if (*buf == '\012') {
357	0	++buf;
358	0	}
359
360		/* parse request line */
361	0	ADVANCE_TOKEN(method, method_len);
362	0	++buf;
363	0	ADVANCE_TOKEN(path, path_len);
364	0	++buf;
365	0	if (method_len == 0 \|\| path_len == 0) {
366	0	*ret = -1;
367	0	return NULL;
368	0	}
369	0	if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
370	0	return NULL;
371	0	}
372	0	if (*buf == '\015') {
373	0	++buf;
374	0	EXPECT_CHAR('\012');
375	0	} else if (*buf == '\012') {
376	0	++buf;
377	0	} else {
378	0	*ret = -1;
379	0	return NULL;
380	0	}
381
382	0	return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
383	0	}
384
385		int phr_parse_request(const char buf_start, size_t len, const char method, size_t method_len, const char **path,
386		size_t path_len, int minor_version, struct phr_header headers, size_t num_headers, size_t last_len)
387	0	{
388	0	const char buf = buf_start, buf_end = buf_start + len;
389	0	size_t max_headers = *num_headers;
390	0	int r;
391
392	0	*method = NULL;
393	0	*method_len = 0;
394	0	*path = NULL;
395	0	*path_len = 0;
396	0	*minor_version = -1;
397	0	*num_headers = 0;
398
399		/* if last_len != 0, check if the request is complete (a fast countermeasure
400		againt slowloris */
401	0	if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
402	0	return r;
403	0	}
404
405	0	if ((buf = parse_request(buf, buf_end, method, method_len, path, path_len, minor_version, headers, num_headers, max_headers,
406	0	&r)) == NULL) {
407	0	return r;
408	0	}
409
410	0	return (int)(buf - buf_start);
411	0	}
412
413		static const char parse_response(const char buf, const char buf_end, int minor_version, int status, const char *msg,
414		size_t msg_len, struct phr_header headers, size_t num_headers, size_t max_headers, int ret)
415	664	{
416		/* parse "HTTP/1.x" */
417	664	if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
418	0	return NULL;
419	0	}
420		/* skip space */
421	664	if (*buf++ != ' ') {
422	0	*ret = -1;
423	0	return NULL;
424	0	}
425		/* parse status code, we want at least [:digit:][:digit:][:digit:]<other char> to try to parse */
426	664	if (buf_end - buf < 4) {
427	0	*ret = -2;
428	0	return NULL;
429	0	}
430	664	PARSE_INT_3(status);
431
432		/* skip space */
433	664	if (*buf++ != ' ') {
434	0	*ret = -1;
435	0	return NULL;
436	0	}
437		/* get message */
438	664	if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) {
439	0	return NULL;
440	0	}
441
442	664	return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
443	664	}
444
445		int phr_parse_response(const char buf_start, size_t len, int minor_version, int status, const char msg, size_t msg_len,
446		struct phr_header headers, size_t num_headers, size_t last_len)
447	664	{
448	664	const char buf = buf_start, buf_end = buf + len;
449	664	size_t max_headers = *num_headers;
450	664	int r;
451
452	664	*minor_version = -1;
453	664	*status = 0;
454	664	*msg = NULL;
455	664	*msg_len = 0;
456	664	*num_headers = 0;
457
458		/* if last_len != 0, check if the response is complete (a fast countermeasure
459		against slowloris */
460	664	if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
461	0	return r;
462	0	}
463
464	664	if ((buf = parse_response(buf, buf_end, minor_version, status, msg, msg_len, headers, num_headers, max_headers, &r)) == NULL) {
465	0	return r;
466	0	}
467
468	664	return (int)(buf - buf_start);
469	664	}
470
471		int phr_parse_headers(const char buf_start, size_t len, struct phr_header headers, size_t *num_headers, size_t last_len)
472	0	{
473	0	const char buf = buf_start, buf_end = buf + len;
474	0	size_t max_headers = *num_headers;
475	0	int r;
476
477	0	*num_headers = 0;
478
479		/* if last_len != 0, check if the response is complete (a fast countermeasure
480		against slowloris */
481	0	if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
482	0	return r;
483	0	}
484
485	0	if ((buf = parse_headers(buf, buf_end, headers, num_headers, max_headers, &r)) == NULL) {
486	0	return r;
487	0	}
488
489	0	return (int)(buf - buf_start);
490	0	}
491
492		enum {
493		CHUNKED_IN_CHUNK_SIZE,
494		CHUNKED_IN_CHUNK_EXT,
495		CHUNKED_IN_CHUNK_DATA,
496		CHUNKED_IN_CHUNK_CRLF,
497		CHUNKED_IN_TRAILERS_LINE_HEAD,
498		CHUNKED_IN_TRAILERS_LINE_MIDDLE
499		};
500
501		static int decode_hex(int ch)
502	0	{
503	0	if ('0' <= ch && ch <= '9') {
504	0	return ch - '0';
505	0	} else if ('A' <= ch && ch <= 'F') {
506	0	return ch - 'A' + 0xa;
507	0	} else if ('a' <= ch && ch <= 'f') {
508	0	return ch - 'a' + 0xa;
509	0	} else {
510	0	return -1;
511	0	}
512	0	}
513
514		ssize_t phr_decode_chunked(struct phr_chunked_decoder decoder, char buf, size_t *_bufsz)
515	0	{
516	0	size_t dst = 0, src = 0, bufsz = *_bufsz;
517	0	ssize_t ret = -2; /* incomplete */
518
519	0	while (1) {
520	0	switch (decoder->_state) {
521	0	case CHUNKED_IN_CHUNK_SIZE:
522	0	for (;; ++src) {
523	0	int v;
524	0	if (src == bufsz)
525	0	goto Exit;
526	0	if ((v = decode_hex(buf[src])) == -1) {
527	0	if (decoder->_hex_count == 0) {
528	0	ret = -1;
529	0	goto Exit;
530	0	}
531	0	break;
532	0	}
533	0	if (decoder->_hex_count == sizeof(size_t) * 2) {
534	0	ret = -1;
535	0	goto Exit;
536	0	}
537	0	decoder->bytes_left_in_chunk = decoder->bytes_left_in_chunk * 16 + v;
538	0	++decoder->_hex_count;
539	0	}
540	0	decoder->_hex_count = 0;
541	0	decoder->_state = CHUNKED_IN_CHUNK_EXT;
542		/* fallthru */
543	0	case CHUNKED_IN_CHUNK_EXT:
544		/* RFC 7230 A.2 "Line folding in chunk extensions is disallowed" */
545	0	for (;; ++src) {
546	0	if (src == bufsz)
547	0	goto Exit;
548	0	if (buf[src] == '\012')
549	0	break;
550	0	}
551	0	++src;
552	0	if (decoder->bytes_left_in_chunk == 0) {
553	0	if (decoder->consume_trailer) {
554	0	decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
555	0	break;
556	0	} else {
557	0	goto Complete;
558	0	}
559	0	}
560	0	decoder->_state = CHUNKED_IN_CHUNK_DATA;
561		/* fallthru */
562	0	case CHUNKED_IN_CHUNK_DATA: {
563	0	size_t avail = bufsz - src;
564	0	if (avail < decoder->bytes_left_in_chunk) {
565	0	if (dst != src)
566	0	memmove(buf + dst, buf + src, avail);
567	0	src += avail;
568	0	dst += avail;
569	0	decoder->bytes_left_in_chunk -= avail;
570	0	goto Exit;
571	0	}
572	0	if (dst != src)
573	0	memmove(buf + dst, buf + src, decoder->bytes_left_in_chunk);
574	0	src += decoder->bytes_left_in_chunk;
575	0	dst += decoder->bytes_left_in_chunk;
576	0	decoder->bytes_left_in_chunk = 0;
577	0	decoder->_state = CHUNKED_IN_CHUNK_CRLF;
578	0	}
579		/* fallthru */
580	0	case CHUNKED_IN_CHUNK_CRLF:
581	0	for (;; ++src) {
582	0	if (src == bufsz)
583	0	goto Exit;
584	0	if (buf[src] != '\015')
585	0	break;
586	0	}
587	0	if (buf[src] != '\012') {
588	0	ret = -1;
589	0	goto Exit;
590	0	}
591	0	++src;
592	0	decoder->_state = CHUNKED_IN_CHUNK_SIZE;
593	0	break;
594	0	case CHUNKED_IN_TRAILERS_LINE_HEAD:
595	0	for (;; ++src) {
596	0	if (src == bufsz)
597	0	goto Exit;
598	0	if (buf[src] != '\015')
599	0	break;
600	0	}
601	0	if (buf[src++] == '\012')
602	0	goto Complete;
603	0	decoder->_state = CHUNKED_IN_TRAILERS_LINE_MIDDLE;
604		/* fallthru */
605	0	case CHUNKED_IN_TRAILERS_LINE_MIDDLE:
606	0	for (;; ++src) {
607	0	if (src == bufsz)
608	0	goto Exit;
609	0	if (buf[src] == '\012')
610	0	break;
611	0	}
612	0	++src;
613	0	decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
614	0	break;
615	0	default:
616	0	assert(!"decoder is corrupt");
617	0	}
618	0	}
619
620	0	Complete:
621	0	ret = bufsz - src;
622	0	Exit:
623	0	if (dst != src)
624	0	memmove(buf + dst, buf + src, bufsz - src);
625	0	*_bufsz = dst;
626	0	return ret;
627	0	}
628
629		int phr_decode_chunked_is_in_data(struct phr_chunked_decoder *decoder)
630	0	{
631	0	return decoder->_state == CHUNKED_IN_CHUNK_DATA;
632	0	}
633
634		#undef CHECK_EOF
635		#undef EXPECT_CHAR
636		#undef ADVANCE_TOKEN