Coverage Report

Created: 2025-07-18 06:41

/src/h2o/lib/common/url.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2014,2015 DeNA Co., Ltd.
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a copy
5
 * of this software and associated documentation files (the "Software"), to
6
 * deal in the Software without restriction, including without limitation the
7
 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8
 * sell copies of the Software, and to permit persons to whom the Software is
9
 * furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice shall be included in
12
 * all copies or substantial portions of the Software.
13
 *
14
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20
 * IN THE SOFTWARE.
21
 */
22
#include <inttypes.h>
23
#include <sys/socket.h>
24
#include <sys/types.h>
25
#include <sys/un.h>
26
#include "h2o/memory.h"
27
#include "h2o/string_.h"
28
#include "h2o/url.h"
29
30
const h2o_url_scheme_t H2O_URL_SCHEME_HTTP = {{H2O_STRLIT("http")}, 80, 0};
31
const h2o_url_scheme_t H2O_URL_SCHEME_HTTPS = {{H2O_STRLIT("https")}, 443, 1};
32
const h2o_url_scheme_t H2O_URL_SCHEME_MASQUE = {{H2O_STRLIT("masque")}, 65535, 0 /* ??? masque might or might not be over TLS */};
33
const h2o_url_scheme_t H2O_URL_SCHEME_FASTCGI = {{H2O_STRLIT("fastcgi")}, 65535, 0};
34
35
static int decode_hex(int ch)
36
5.60k
{
37
5.60k
    if ('0' <= ch && ch <= '9')
38
1.15k
        return ch - '0';
39
4.44k
    if ('A' <= ch && ch <= 'F')
40
715
        return ch - 'A' + 0xa;
41
3.73k
    if ('a' <= ch && ch <= 'f')
42
1.03k
        return ch - 'a' + 0xa;
43
2.70k
    return -1;
44
3.73k
}
45
46
static size_t handle_special_paths(const char *path, size_t off, size_t last_slash)
47
5.79k
{
48
5.79k
    size_t orig_off = off, part_size = off - last_slash;
49
50
5.79k
    if (part_size == 2 && path[off - 1] == '.') {
51
52
        --off;
52
5.74k
    } else if (part_size == 3 && path[off - 2] == '.' && path[off - 1] == '.') {
53
247
        off -= 2;
54
247
        if (off > 1) {
55
2.98k
            for (--off; path[off - 1] != '/'; --off)
56
2.75k
                ;
57
235
        }
58
247
    }
59
5.79k
    return orig_off - off;
60
5.79k
}
61
62
/* Perform path normalization and URL decoding in one pass.
63
 * See h2o_req_t for the purpose of @norm_indexes. */
64
static h2o_iovec_t rebuild_path(h2o_mem_pool_t *pool, const char *src, size_t src_len, size_t *query_at, size_t **norm_indexes)
65
2.35k
{
66
2.35k
    char *dst;
67
2.35k
    size_t src_off = 0, dst_off = 0, last_slash, rewind;
68
69
2.35k
    { /* locate '?', and set len to the end of input path */
70
2.35k
        const char *q = memchr(src, '?', src_len);
71
2.35k
        if (q != NULL) {
72
206
            src_len = *query_at = q - src;
73
2.15k
        } else {
74
2.15k
            *query_at = SIZE_MAX;
75
2.15k
        }
76
2.35k
    }
77
78
    /* dst can be 1 byte more than src if src is missing the prefixing '/' */
79
2.35k
    dst = h2o_mem_alloc_pool(pool, char, src_len + 1);
80
2.35k
    *norm_indexes = h2o_mem_alloc_pool(pool, *norm_indexes[0], (src_len + 1));
81
82
2.35k
    if (src[0] == '/')
83
1.01k
        src_off++;
84
2.35k
    last_slash = dst_off;
85
2.35k
    dst[dst_off] = '/';
86
2.35k
    (*norm_indexes)[dst_off] = src_off;
87
2.35k
    dst_off++;
88
89
    /* decode %xx */
90
64.2k
    while (src_off < src_len) {
91
61.8k
        int hi, lo;
92
61.8k
        char decoded;
93
94
61.8k
        if (src[src_off] == '%' && (src_off + 2 < src_len) && (hi = decode_hex(src[src_off + 1])) != -1 &&
95
61.8k
            (lo = decode_hex(src[src_off + 2])) != -1) {
96
1.00k
            decoded = (hi << 4) | lo;
97
1.00k
            src_off += 3;
98
60.8k
        } else {
99
60.8k
            decoded = src[src_off++];
100
60.8k
        }
101
61.8k
        if (decoded == '/') {
102
3.43k
            rewind = handle_special_paths(dst, dst_off, last_slash);
103
3.43k
            if (rewind > 0) {
104
252
                dst_off -= rewind;
105
252
                last_slash = dst_off - 1;
106
252
                continue;
107
252
            }
108
3.18k
            last_slash = dst_off;
109
3.18k
        }
110
61.6k
        dst[dst_off] = decoded;
111
61.6k
        (*norm_indexes)[dst_off] = src_off;
112
61.6k
        dst_off++;
113
61.6k
    }
114
2.35k
    rewind = handle_special_paths(dst, dst_off, last_slash);
115
2.35k
    dst_off -= rewind;
116
117
2.35k
    return h2o_iovec_init(dst, dst_off);
118
2.35k
}
119
120
h2o_iovec_t h2o_url_normalize_path(h2o_mem_pool_t *pool, const char *path, size_t len, size_t *query_at, size_t **norm_indexes)
121
5.52k
{
122
5.52k
    h2o_iovec_t ret;
123
124
5.52k
    *query_at = SIZE_MAX;
125
5.52k
    *norm_indexes = NULL;
126
127
5.52k
    if (len == 0) {
128
20
        ret = h2o_iovec_init("/", 1);
129
20
        return ret;
130
20
    }
131
132
5.50k
    const char *p = path, *end = path + len;
133
134
5.50k
    if (path[0] != '/')
135
1.34k
        goto Rewrite;
136
137
50.4k
    for (; p + 1 < end; ++p) {
138
47.8k
        if ((p[0] == '/' && p[1] == '.') || p[0] == '%') {
139
            /* detect false positives as well */
140
1.01k
            goto Rewrite;
141
46.8k
        } else if (p[0] == '?') {
142
536
            *query_at = p - path;
143
536
            goto Return;
144
536
        }
145
47.8k
    }
146
5.15k
    for (; p < end; ++p) {
147
2.60k
        if (p[0] == '?') {
148
64
            *query_at = p - path;
149
64
            goto Return;
150
64
        }
151
2.60k
    }
152
153
3.14k
Return:
154
3.14k
    ret.base = (char *)path;
155
3.14k
    ret.len = p - path;
156
3.14k
    return ret;
157
158
2.35k
Rewrite:
159
2.35k
    ret = rebuild_path(pool, path, len, query_at, norm_indexes);
160
2.35k
    if (ret.len == 0)
161
0
        goto RewriteError;
162
2.35k
    if (ret.base[0] != '/')
163
0
        goto RewriteError;
164
2.35k
    if (h2o_strstr(ret.base, ret.len, H2O_STRLIT("/../")) != SIZE_MAX)
165
0
        goto RewriteError;
166
2.35k
    if (ret.len >= 3 && memcmp(ret.base + ret.len - 3, "/..", 3) == 0)
167
0
        goto RewriteError;
168
2.35k
    return ret;
169
0
RewriteError:
170
0
    h2o_error_printf("failed to normalize path: `%.*s` => `%.*s`\n", (int)len, path, (int)ret.len, ret.base);
171
0
    ret = h2o_iovec_init("/", 1);
172
0
    return ret;
173
2.35k
}
174
175
static const char *parse_scheme(const char *s, const char *end, const h2o_url_scheme_t **scheme)
176
1
{
177
1
    if (end - s >= 5 && memcmp(s, "http:", 5) == 0) {
178
1
        *scheme = &H2O_URL_SCHEME_HTTP;
179
1
        return s + 5;
180
1
    } else if (end - s >= 6 && memcmp(s, "https:", 6) == 0) {
181
0
        *scheme = &H2O_URL_SCHEME_HTTPS;
182
0
        return s + 6;
183
0
    } else if (end - s >= 7 && memcmp(s, "masque:", 7) == 0) {
184
0
        *scheme = &H2O_URL_SCHEME_MASQUE;
185
0
        return s + 7;
186
0
    }
187
0
    return NULL;
188
1
}
189
190
const char *h2o_url_parse_hostport(const char *s, size_t len, h2o_iovec_t *host, uint16_t *port)
191
733
{
192
733
    const char *token_start = s, *token_end, *end = s + len;
193
194
733
    *port = 65535;
195
196
733
    if (token_start == end)
197
0
        return NULL;
198
199
733
    if (*token_start == '[') {
200
        /* is IPv6 address */
201
733
        ++token_start;
202
733
        if ((token_end = memchr(token_start, ']', end - token_start)) == NULL)
203
0
            return NULL;
204
733
        *host = h2o_iovec_init(token_start, token_end - token_start);
205
733
        token_start = token_end + 1;
206
733
    } else {
207
0
        for (token_end = token_start; !(token_end == end || *token_end == '/' || *token_end == '?' || *token_end == ':');
208
0
             ++token_end)
209
0
            ;
210
0
        *host = h2o_iovec_init(token_start, token_end - token_start);
211
0
        token_start = token_end;
212
0
    }
213
214
    /* disallow zero-length host */
215
733
    if (host->len == 0)
216
0
        return NULL;
217
218
    /* parse port */
219
733
    if (token_start != end && *token_start == ':') {
220
0
        uint32_t p = 0;
221
0
        for (++token_start; token_start != end; ++token_start) {
222
0
            if ('0' <= *token_start && *token_start <= '9') {
223
0
                p = p * 10 + *token_start - '0';
224
0
                if (p >= 65535)
225
0
                    return NULL;
226
0
            } else if (*token_start == '/' || *token_start == '?') {
227
0
                break;
228
0
            }
229
0
        }
230
0
        *port = (uint16_t)p;
231
0
    }
232
233
733
    return token_start;
234
733
}
235
236
static int parse_authority_and_path(h2o_mem_pool_t *pool, const char *src, const char *url_end, h2o_url_t *parsed)
237
1
{
238
1
    const char *p = h2o_url_parse_hostport(src, url_end - src, &parsed->host, &parsed->_port);
239
1
    if (p == NULL)
240
0
        return -1;
241
1
    parsed->authority = h2o_iovec_init(src, p - src);
242
1
    if (p == url_end) {
243
0
        parsed->path = h2o_iovec_init(H2O_STRLIT("/"));
244
1
    } else if (*p == '/') {
245
1
        parsed->path = h2o_iovec_init(p, url_end - p);
246
1
    } else if (*p == '?') {
247
0
        parsed->path = h2o_concat(pool, h2o_iovec_init(H2O_STRLIT("/")), h2o_iovec_init(p, url_end - p));
248
0
    } else {
249
0
        return -1;
250
0
    }
251
1
    return 0;
252
1
}
253
254
int h2o_url_parse(h2o_mem_pool_t *pool, const char *url, size_t url_len, h2o_url_t *parsed)
255
1
{
256
1
    const char *url_end, *p;
257
258
1
    if (url_len == SIZE_MAX)
259
0
        url_len = strlen(url);
260
1
    url_end = url + url_len;
261
262
    /* check and skip scheme */
263
1
    if ((p = parse_scheme(url, url_end, &parsed->scheme)) == NULL)
264
0
        return -1;
265
266
    /* skip "//" */
267
1
    if (!(url_end - p >= 2 && p[0] == '/' && p[1] == '/'))
268
0
        return -1;
269
1
    p += 2;
270
271
1
    return parse_authority_and_path(pool, p, url_end, parsed);
272
1
}
273
274
int h2o_url_parse_relative(h2o_mem_pool_t *pool, const char *url, size_t url_len, h2o_url_t *parsed)
275
0
{
276
0
    const char *url_end, *p;
277
278
0
    if (url_len == SIZE_MAX)
279
0
        url_len = strlen(url);
280
0
    url_end = url + url_len;
281
282
    /* obtain scheme and port number */
283
0
    if ((p = parse_scheme(url, url_end, &parsed->scheme)) == NULL) {
284
0
        parsed->scheme = NULL;
285
0
        p = url;
286
0
    }
287
288
    /* handle "//" */
289
0
    if (url_end - p >= 2 && p[0] == '/' && p[1] == '/')
290
0
        return parse_authority_and_path(pool, p + 2, url_end, parsed);
291
292
    /* reset authority, host, port, and set path */
293
0
    parsed->authority = (h2o_iovec_t){NULL};
294
0
    parsed->host = (h2o_iovec_t){NULL};
295
0
    parsed->_port = 65535;
296
0
    parsed->path = h2o_iovec_init(p, url_end - p);
297
298
0
    return 0;
299
0
}
300
301
h2o_iovec_t h2o_url_resolve(h2o_mem_pool_t *pool, const h2o_url_t *base, const h2o_url_t *relative, h2o_url_t *dest)
302
0
{
303
0
    h2o_iovec_t base_path, relative_path, ret;
304
305
0
    assert(base->path.len != 0);
306
0
    assert(base->path.base[0] == '/');
307
308
0
    if (relative == NULL) {
309
        /* build URL using base copied to dest */
310
0
        *dest = *base;
311
0
        base_path = base->path;
312
0
        relative_path = h2o_iovec_init(NULL, 0);
313
0
        goto Build;
314
0
    }
315
316
    /* scheme */
317
0
    dest->scheme = relative->scheme != NULL ? relative->scheme : base->scheme;
318
319
    /* authority (and host:port) */
320
0
    if (relative->authority.base != NULL) {
321
0
        assert(relative->host.base != NULL);
322
0
        dest->authority = relative->authority;
323
0
        dest->host = relative->host;
324
0
        dest->_port = relative->_port;
325
0
    } else {
326
0
        assert(relative->host.base == NULL);
327
0
        assert(relative->_port == 65535);
328
0
        dest->authority = base->authority;
329
0
        dest->host = base->host;
330
0
        dest->_port = base->_port;
331
0
    }
332
333
    /* path */
334
0
    base_path = base->path;
335
0
    if (relative->path.base != NULL) {
336
0
        relative_path = relative->path;
337
0
        h2o_url_resolve_path(&base_path, &relative_path);
338
0
    } else {
339
0
        assert(relative->path.len == 0);
340
0
        relative_path = (h2o_iovec_t){NULL};
341
0
    }
342
343
0
Build:
344
    /* build the output */
345
0
    ret = h2o_concat(pool, dest->scheme->name, h2o_iovec_init(H2O_STRLIT("://")), dest->authority, base_path, relative_path);
346
    /* adjust dest */
347
0
    dest->authority.base = ret.base + dest->scheme->name.len + 3;
348
0
    dest->host.base = dest->authority.base;
349
0
    if (dest->authority.len != 0 && dest->authority.base[0] == '[')
350
0
        ++dest->host.base;
351
0
    dest->path.base = dest->authority.base + dest->authority.len;
352
0
    dest->path.len = ret.base + ret.len - dest->path.base;
353
354
0
    return ret;
355
0
}
356
357
void h2o_url_resolve_path(h2o_iovec_t *base, h2o_iovec_t *relative)
358
0
{
359
0
    size_t base_path_len = base->len, rel_path_offset = 0;
360
361
0
    if (relative->len != 0 && relative->base[0] == '/') {
362
0
        base_path_len = 0;
363
0
    } else {
364
        /* relative path */
365
0
        while (base->base[--base_path_len] != '/')
366
0
            ;
367
0
        while (rel_path_offset != relative->len) {
368
0
            if (relative->base[rel_path_offset] == '.') {
369
0
                if (relative->len - rel_path_offset >= 2 && relative->base[rel_path_offset + 1] == '.' &&
370
0
                    (relative->len - rel_path_offset == 2 || relative->base[rel_path_offset + 2] == '/')) {
371
0
                    if (base_path_len != 0) {
372
0
                        while (base->base[--base_path_len] != '/')
373
0
                            ;
374
0
                    }
375
0
                    rel_path_offset += relative->len - rel_path_offset == 2 ? 2 : 3;
376
0
                    continue;
377
0
                }
378
0
                if (relative->len - rel_path_offset == 1) {
379
0
                    rel_path_offset += 1;
380
0
                    continue;
381
0
                } else if (relative->base[rel_path_offset + 1] == '/') {
382
0
                    rel_path_offset += 2;
383
0
                    continue;
384
0
                }
385
0
            }
386
0
            break;
387
0
        }
388
0
        base_path_len += 1;
389
0
    }
390
391
0
    base->len = base_path_len;
392
0
    *relative = h2o_iovec_init(relative->base + rel_path_offset, relative->len - rel_path_offset);
393
0
}
394
395
void h2o_url_copy(h2o_mem_pool_t *pool, h2o_url_t *dest, const h2o_url_t *src)
396
1
{
397
1
    dest->scheme = src->scheme;
398
1
    dest->authority = h2o_strdup(pool, src->authority.base, src->authority.len);
399
1
    dest->host = h2o_strdup(pool, src->host.base, src->host.len);
400
1
    dest->path = h2o_strdup(pool, src->path.base, src->path.len);
401
1
    dest->_port = src->_port;
402
1
}
403
404
const char *h2o_url_host_to_sun(h2o_iovec_t host, struct sockaddr_un *sa)
405
1
{
406
6
#define PREFIX "unix:"
407
408
1
    if (host.len < sizeof(PREFIX) - 1 || memcmp(host.base, PREFIX, sizeof(PREFIX) - 1) != 0)
409
0
        return h2o_url_host_to_sun_err_is_not_unix_socket;
410
411
1
    if (host.len - sizeof(PREFIX) - 1 >= sizeof(sa->sun_path))
412
0
        return "unix-domain socket path is too long";
413
414
1
    memset(sa, 0, sizeof(*sa));
415
1
    sa->sun_family = AF_UNIX;
416
1
    memcpy(sa->sun_path, host.base + sizeof(PREFIX) - 1, host.len - (sizeof(PREFIX) - 1));
417
1
    return NULL;
418
419
1
#undef PREFIX
420
1
}
421
422
const char h2o_url_host_to_sun_err_is_not_unix_socket[] = "supplied name does not look like an unix-domain socket";
423
424
int h2o_url_init_with_hostport(h2o_url_t *url, h2o_mem_pool_t *pool, const h2o_url_scheme_t *scheme, h2o_iovec_t host,
425
                               uint16_t port, h2o_iovec_t path)
426
0
{
427
0
    url->scheme = scheme;
428
0
    url->path = path;
429
430
0
    if (port == scheme->default_port) {
431
0
        url->_port = 65535;
432
0
        url->authority = h2o_strdup(pool, host.base, host.len);
433
0
        url->host = url->authority;
434
0
    } else {
435
0
        url->_port = port;
436
0
        char _port[sizeof(H2O_UINT16_LONGEST_STR)];
437
0
        int port_len = sprintf(_port, "%" PRIu16, port);
438
0
        if (port_len < 0)
439
0
            return -1;
440
441
0
        url->authority.len = host.len + 1 + port_len;
442
0
        url->authority.base = pool == NULL ? h2o_mem_alloc(url->authority.len) : h2o_mem_alloc_pool(pool, char, url->authority.len);
443
0
        memcpy(url->authority.base, host.base, host.len);
444
0
        memcpy(url->authority.base + host.len, ":", 1);
445
0
        memcpy(url->authority.base + host.len + 1, _port, port_len);
446
0
        url->host = h2o_iovec_init(url->authority.base, url->authority.len - 1 - port_len);
447
0
    }
448
449
0
    return 0;
450
0
}
451
452
int h2o_url_init_with_sun_path(h2o_url_t *url, h2o_mem_pool_t *pool, const h2o_url_scheme_t *scheme, h2o_iovec_t sun_path,
453
                               h2o_iovec_t path)
454
0
{
455
0
    url->scheme = scheme;
456
0
    url->path = path;
457
0
    url->_port = 65535;
458
459
0
#define PREFIX "[unix:"
460
0
#define SUFFIX "]"
461
0
    url->authority.len = strlen(PREFIX SUFFIX) + sun_path.len;
462
0
    url->authority.base = pool == NULL ? h2o_mem_alloc(url->authority.len) : h2o_mem_alloc_pool(pool, char, url->authority.len);
463
0
    memcpy(url->authority.base, PREFIX, sizeof(PREFIX) - 1);
464
0
    memcpy(url->authority.base + sizeof(PREFIX) - 1, sun_path.base, sun_path.len);
465
0
    memcpy(url->authority.base + url->authority.len - 1, SUFFIX, sizeof(SUFFIX) - 1);
466
0
#undef PREFIX
467
0
#undef SUFFIX
468
469
0
    url->host = h2o_iovec_init(url->authority.base + 1, url->authority.len - 2);
470
471
0
    return 0;
472
0
}