Coverage Report

Created: 2026-05-30 06:23

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/h2o/lib/common/url.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2014,2015 DeNA Co., Ltd.
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a copy
5
 * of this software and associated documentation files (the "Software"), to
6
 * deal in the Software without restriction, including without limitation the
7
 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8
 * sell copies of the Software, and to permit persons to whom the Software is
9
 * furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice shall be included in
12
 * all copies or substantial portions of the Software.
13
 *
14
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20
 * IN THE SOFTWARE.
21
 */
22
#include <inttypes.h>
23
#include <sys/socket.h>
24
#include <sys/types.h>
25
#include <sys/un.h>
26
#include "h2o/memory.h"
27
#include "h2o/string_.h"
28
#include "h2o/url.h"
29
30
const h2o_url_scheme_t H2O_URL_SCHEME_HTTP = {{H2O_STRLIT("http")}, 80, 0};
31
const h2o_url_scheme_t H2O_URL_SCHEME_HTTPS = {{H2O_STRLIT("https")}, 443, 1};
32
const h2o_url_scheme_t H2O_URL_SCHEME_MASQUE = {{H2O_STRLIT("masque")}, 65535, 0 /* ??? masque might or might not be over TLS */};
33
const h2o_url_scheme_t H2O_URL_SCHEME_FASTCGI = {{H2O_STRLIT("fastcgi")}, 65535, 0};
34
35
static int decode_hex(int ch)
36
152k
{
37
152k
    if ('0' <= ch && ch <= '9')
38
14.7k
        return ch - '0';
39
137k
    if ('A' <= ch && ch <= 'F')
40
18.9k
        return ch - 'A' + 0xa;
41
118k
    if ('a' <= ch && ch <= 'f')
42
27.9k
        return ch - 'a' + 0xa;
43
90.6k
    return -1;
44
118k
}
45
46
static size_t handle_special_paths(const char *path, size_t off, size_t last_slash)
47
57.5k
{
48
57.5k
    size_t orig_off = off, part_size = off - last_slash;
49
50
57.5k
    if (part_size == 2 && path[off - 1] == '.') {
51
2.19k
        --off;
52
55.3k
    } else if (part_size == 3 && path[off - 2] == '.' && path[off - 1] == '.') {
53
2.77k
        off -= 2;
54
2.77k
        if (off > 1) {
55
89.6k
            for (--off; path[off - 1] != '/'; --off)
56
87.5k
                ;
57
2.10k
        }
58
2.77k
    }
59
57.5k
    return orig_off - off;
60
57.5k
}
61
62
/* Perform path normalization and URL decoding in one pass.
63
 * See h2o_req_t for the purpose of @norm_indexes. */
64
static h2o_iovec_t rebuild_path(h2o_mem_pool_t *pool, const char *src, size_t src_len, size_t *query_at, size_t **norm_indexes,
65
                                int *null_char_found)
66
13.4k
{
67
13.4k
    char *dst;
68
13.4k
    size_t src_off = 0, dst_off = 0, last_slash, rewind;
69
70
13.4k
    { /* locate '?', and set len to the end of input path */
71
13.4k
        const char *q = memchr(src, '?', src_len);
72
13.4k
        if (q != NULL) {
73
727
            src_len = *query_at = q - src;
74
12.7k
        } else {
75
12.7k
            *query_at = SIZE_MAX;
76
12.7k
        }
77
13.4k
    }
78
79
    /* dst can be 1 byte more than src if src is missing the prefixing '/' */
80
13.4k
    dst = h2o_mem_alloc_pool(pool, char, src_len + 1);
81
13.4k
    *norm_indexes = h2o_mem_alloc_pool(pool, *norm_indexes[0], (src_len + 1));
82
83
13.4k
    if (src[0] == '/')
84
3.25k
        src_off++;
85
13.4k
    last_slash = dst_off;
86
13.4k
    dst[dst_off] = '/';
87
13.4k
    (*norm_indexes)[dst_off] = src_off;
88
13.4k
    dst_off++;
89
90
    /* decode %xx */
91
1.76M
    while (src_off < src_len) {
92
1.74M
        int hi, lo;
93
1.74M
        char decoded;
94
95
1.74M
        if (src[src_off] == '%' && (src_off + 2 < src_len) && (hi = decode_hex(src[src_off + 1])) != -1 &&
96
50.9k
            (lo = decode_hex(src[src_off + 2])) != -1) {
97
10.7k
            decoded = (hi << 4) | lo;
98
10.7k
            src_off += 3;
99
1.73M
        } else {
100
1.73M
            decoded = src[src_off++];
101
1.73M
        }
102
1.74M
        if (decoded == '/') {
103
44.1k
            rewind = handle_special_paths(dst, dst_off, last_slash);
104
44.1k
            if (rewind > 0) {
105
4.43k
                dst_off -= rewind;
106
4.43k
                last_slash = dst_off - 1;
107
4.43k
                continue;
108
4.43k
            }
109
39.6k
            last_slash = dst_off;
110
39.6k
        }
111
1.74M
        dst[dst_off] = decoded;
112
1.74M
        (*norm_indexes)[dst_off] = src_off;
113
1.74M
        if (decoded == '\0')
114
11.2k
            *null_char_found = 1;
115
1.74M
        dst_off++;
116
1.74M
    }
117
13.4k
    rewind = handle_special_paths(dst, dst_off, last_slash);
118
13.4k
    dst_off -= rewind;
119
120
13.4k
    return h2o_iovec_init(dst, dst_off);
121
13.4k
}
122
123
h2o_iovec_t h2o_url_normalize_path(h2o_mem_pool_t *pool, const char *path, size_t len, size_t *query_at, size_t **norm_indexes,
124
                                   int *null_char_found)
125
30.3k
{
126
30.3k
    h2o_iovec_t ret;
127
128
30.3k
    *query_at = SIZE_MAX;
129
30.3k
    *norm_indexes = NULL;
130
30.3k
    *null_char_found = 0;
131
132
30.3k
    if (len == 0) {
133
2.10k
        ret = h2o_iovec_init("/", 1);
134
2.10k
        return ret;
135
2.10k
    }
136
137
28.2k
    const char *p = path, *end = path + len;
138
139
28.2k
    if (path[0] != '/')
140
10.1k
        goto Rewrite;
141
142
222k
    for (; p + 1 < end; ++p) {
143
208k
        if ((p[0] == '/' && p[1] == '.') || p[0] == '%' || p[0] == '\0') {
144
            /* detect false positives as well */
145
3.23k
            goto Rewrite;
146
205k
        } else if (p[0] == '?') {
147
918
            *query_at = p - path;
148
918
            goto Return;
149
918
        }
150
208k
    }
151
27.7k
    for (; p < end; ++p) {
152
13.9k
        if (p[0] == '\0') {
153
14
            goto Rewrite;
154
13.9k
        } else if (p[0] == '?') {
155
94
            *query_at = p - path;
156
94
            goto Return;
157
94
        }
158
13.9k
    }
159
160
14.8k
Return:
161
14.8k
    ret.base = (char *)path;
162
14.8k
    ret.len = p - path;
163
14.8k
    return ret;
164
165
13.4k
Rewrite:
166
13.4k
    ret = rebuild_path(pool, path, len, query_at, norm_indexes, null_char_found);
167
13.4k
    if (ret.len == 0)
168
0
        goto RewriteError;
169
13.4k
    if (ret.base[0] != '/')
170
0
        goto RewriteError;
171
13.4k
    if (h2o_strstr(ret.base, ret.len, H2O_STRLIT("/../")) != SIZE_MAX)
172
0
        goto RewriteError;
173
13.4k
    if (ret.len >= 3 && memcmp(ret.base + ret.len - 3, "/..", 3) == 0)
174
0
        goto RewriteError;
175
13.4k
    return ret;
176
0
RewriteError:
177
0
    h2o_error_printf("failed to normalize path: `%.*s` => `%.*s`\n", (int)len, path, (int)ret.len, ret.base);
178
0
    ret = h2o_iovec_init("/", 1);
179
0
    return ret;
180
13.4k
}
181
182
static const char *parse_scheme(const char *s, const char *end, const h2o_url_scheme_t **scheme)
183
7.91k
{
184
7.91k
    if (end - s >= 5 && memcmp(s, "http:", 5) == 0) {
185
1.15k
        *scheme = &H2O_URL_SCHEME_HTTP;
186
1.15k
        return s + 5;
187
6.76k
    } else if (end - s >= 6 && memcmp(s, "https:", 6) == 0) {
188
21
        *scheme = &H2O_URL_SCHEME_HTTPS;
189
21
        return s + 6;
190
6.74k
    } else if (end - s >= 7 && memcmp(s, "masque:", 7) == 0) {
191
54
        *scheme = &H2O_URL_SCHEME_MASQUE;
192
54
        return s + 7;
193
54
    }
194
6.69k
    return NULL;
195
7.91k
}
196
197
const char *h2o_url_parse_hostport(const char *s, size_t len, h2o_iovec_t *host, uint16_t *port)
198
4.99k
{
199
4.99k
    const char *token_start = s, *token_end, *end = s + len;
200
201
4.99k
    *port = 65535;
202
203
4.99k
    if (token_start == end)
204
42
        return NULL;
205
206
4.95k
    if (*token_start == '[') {
207
        /* is IPv6 address */
208
4.12k
        ++token_start;
209
4.12k
        if ((token_end = memchr(token_start, ']', end - token_start)) == NULL)
210
36
            return NULL;
211
4.08k
        *host = h2o_iovec_init(token_start, token_end - token_start);
212
4.08k
        token_start = token_end + 1;
213
4.08k
    } else {
214
4.92M
        for (token_end = token_start; !(token_end == end || *token_end == '/' || *token_end == '?' || *token_end == ':');
215
4.92M
             ++token_end)
216
4.92M
            ;
217
830
        *host = h2o_iovec_init(token_start, token_end - token_start);
218
830
        token_start = token_end;
219
830
    }
220
221
    /* disallow zero-length host */
222
4.91k
    if (host->len == 0)
223
39
        return NULL;
224
225
    /* parse port */
226
4.87k
    if (token_start != end && *token_start == ':') {
227
395
        uint32_t p = 0;
228
50.4k
        for (++token_start; token_start != end; ++token_start) {
229
50.2k
            if ('0' <= *token_start && *token_start <= '9') {
230
1.28k
                p = p * 10 + *token_start - '0';
231
1.28k
                if (p >= 65535)
232
67
                    return NULL;
233
48.9k
            } else if (*token_start == '/' || *token_start == '?') {
234
119
                break;
235
119
            }
236
50.2k
        }
237
328
        *port = (uint16_t)p;
238
328
    }
239
240
4.81k
    return token_start;
241
4.87k
}
242
243
static int parse_authority_and_path(h2o_mem_pool_t *pool, const char *src, const char *url_end, h2o_url_t *parsed)
244
973
{
245
973
    const char *p = h2o_url_parse_hostport(src, url_end - src, &parsed->host, &parsed->_port);
246
973
    if (p == NULL)
247
184
        return -1;
248
789
    parsed->authority = h2o_iovec_init(src, p - src);
249
789
    if (p == url_end) {
250
400
        parsed->path = h2o_iovec_init(H2O_STRLIT("/"));
251
400
    } else if (*p == '/') {
252
121
        parsed->path = h2o_iovec_init(p, url_end - p);
253
268
    } else if (*p == '?') {
254
207
        parsed->path = h2o_concat(pool, h2o_iovec_init(H2O_STRLIT("/")), h2o_iovec_init(p, url_end - p));
255
207
    } else {
256
61
        return -1;
257
61
    }
258
728
    return 0;
259
789
}
260
261
int h2o_url_parse(h2o_mem_pool_t *pool, const char *url, size_t url_len, h2o_url_t *parsed)
262
7.91k
{
263
7.91k
    const char *url_end, *p;
264
265
7.91k
    if (url_len == SIZE_MAX)
266
0
        url_len = strlen(url);
267
7.91k
    url_end = url + url_len;
268
269
    /* check and skip scheme */
270
7.91k
    if ((p = parse_scheme(url, url_end, &parsed->scheme)) == NULL)
271
6.69k
        return -1;
272
273
    /* skip "//" */
274
1.22k
    if (!(url_end - p >= 2 && p[0] == '/' && p[1] == '/'))
275
252
        return -1;
276
973
    p += 2;
277
278
973
    return parse_authority_and_path(pool, p, url_end, parsed);
279
1.22k
}
280
281
int h2o_url_parse_relative(h2o_mem_pool_t *pool, const char *url, size_t url_len, h2o_url_t *parsed)
282
0
{
283
0
    const char *url_end, *p;
284
285
0
    if (url_len == SIZE_MAX)
286
0
        url_len = strlen(url);
287
0
    url_end = url + url_len;
288
289
    /* obtain scheme and port number */
290
0
    if ((p = parse_scheme(url, url_end, &parsed->scheme)) == NULL) {
291
0
        parsed->scheme = NULL;
292
0
        p = url;
293
0
    }
294
295
    /* handle "//" */
296
0
    if (url_end - p >= 2 && p[0] == '/' && p[1] == '/')
297
0
        return parse_authority_and_path(pool, p + 2, url_end, parsed);
298
299
    /* reset authority, host, port, and set path */
300
0
    parsed->authority = (h2o_iovec_t){NULL};
301
0
    parsed->host = (h2o_iovec_t){NULL};
302
0
    parsed->_port = 65535;
303
0
    parsed->path = h2o_iovec_init(p, url_end - p);
304
305
0
    return 0;
306
0
}
307
308
h2o_iovec_t h2o_url_resolve(h2o_mem_pool_t *pool, const h2o_url_t *base, const h2o_url_t *relative, h2o_url_t *dest)
309
0
{
310
0
    h2o_iovec_t base_path, relative_path, ret;
311
312
0
    assert(base->path.len != 0);
313
0
    assert(base->path.base[0] == '/');
314
315
0
    if (relative == NULL) {
316
        /* build URL using base copied to dest */
317
0
        *dest = *base;
318
0
        base_path = base->path;
319
0
        relative_path = h2o_iovec_init(NULL, 0);
320
0
        goto Build;
321
0
    }
322
323
    /* scheme */
324
0
    dest->scheme = relative->scheme != NULL ? relative->scheme : base->scheme;
325
326
    /* authority (and host:port) */
327
0
    if (relative->authority.base != NULL) {
328
0
        assert(relative->host.base != NULL);
329
0
        dest->authority = relative->authority;
330
0
        dest->host = relative->host;
331
0
        dest->_port = relative->_port;
332
0
    } else {
333
0
        assert(relative->host.base == NULL);
334
0
        assert(relative->_port == 65535);
335
0
        dest->authority = base->authority;
336
0
        dest->host = base->host;
337
0
        dest->_port = base->_port;
338
0
    }
339
340
    /* path */
341
0
    base_path = base->path;
342
0
    if (relative->path.base != NULL) {
343
0
        relative_path = relative->path;
344
0
        h2o_url_resolve_path(&base_path, &relative_path);
345
0
    } else {
346
0
        assert(relative->path.len == 0);
347
0
        relative_path = (h2o_iovec_t){NULL};
348
0
    }
349
350
0
Build:
351
    /* build the output */
352
0
    ret = h2o_concat(pool, dest->scheme->name, h2o_iovec_init(H2O_STRLIT("://")), dest->authority, base_path, relative_path);
353
    /* adjust dest */
354
0
    dest->authority.base = ret.base + dest->scheme->name.len + 3;
355
0
    dest->host.base = dest->authority.base;
356
0
    if (dest->authority.len != 0 && dest->authority.base[0] == '[')
357
0
        ++dest->host.base;
358
0
    dest->path.base = dest->authority.base + dest->authority.len;
359
0
    dest->path.len = ret.base + ret.len - dest->path.base;
360
361
0
    return ret;
362
0
}
363
364
void h2o_url_resolve_path(h2o_iovec_t *base, h2o_iovec_t *relative)
365
0
{
366
0
    size_t base_path_len = base->len, rel_path_offset = 0;
367
368
0
    if (relative->len != 0 && relative->base[0] == '/') {
369
0
        base_path_len = 0;
370
0
    } else {
371
        /* relative path */
372
0
        while (base->base[--base_path_len] != '/')
373
0
            ;
374
0
        while (rel_path_offset != relative->len) {
375
0
            if (relative->base[rel_path_offset] == '.') {
376
0
                if (relative->len - rel_path_offset >= 2 && relative->base[rel_path_offset + 1] == '.' &&
377
0
                    (relative->len - rel_path_offset == 2 || relative->base[rel_path_offset + 2] == '/')) {
378
0
                    if (base_path_len != 0) {
379
0
                        while (base->base[--base_path_len] != '/')
380
0
                            ;
381
0
                    }
382
0
                    rel_path_offset += relative->len - rel_path_offset == 2 ? 2 : 3;
383
0
                    continue;
384
0
                }
385
0
                if (relative->len - rel_path_offset == 1) {
386
0
                    rel_path_offset += 1;
387
0
                    continue;
388
0
                } else if (relative->base[rel_path_offset + 1] == '/') {
389
0
                    rel_path_offset += 2;
390
0
                    continue;
391
0
                }
392
0
            }
393
0
            break;
394
0
        }
395
0
        base_path_len += 1;
396
0
    }
397
398
0
    base->len = base_path_len;
399
0
    *relative = h2o_iovec_init(relative->base + rel_path_offset, relative->len - rel_path_offset);
400
0
}
401
402
void h2o_url_copy(h2o_mem_pool_t *pool, h2o_url_t *dest, const h2o_url_t *src)
403
3
{
404
3
    dest->scheme = src->scheme;
405
3
    dest->authority = h2o_strdup(pool, src->authority.base, src->authority.len);
406
3
    dest->host = h2o_strdup(pool, src->host.base, src->host.len);
407
3
    dest->path = h2o_strdup(pool, src->path.base, src->path.len);
408
3
    dest->_port = src->_port;
409
3
}
410
411
const char *h2o_url_host_to_sun(h2o_iovec_t host, struct sockaddr_un *sa)
412
3
{
413
18
#define PREFIX "unix:"
414
415
3
    if (host.len < sizeof(PREFIX) - 1 || memcmp(host.base, PREFIX, sizeof(PREFIX) - 1) != 0)
416
0
        return h2o_url_host_to_sun_err_is_not_unix_socket;
417
418
3
    if (host.len - sizeof(PREFIX) - 1 >= sizeof(sa->sun_path))
419
0
        return "unix-domain socket path is too long";
420
421
3
    memset(sa, 0, sizeof(*sa));
422
3
    sa->sun_family = AF_UNIX;
423
3
    memcpy(sa->sun_path, host.base + sizeof(PREFIX) - 1, host.len - (sizeof(PREFIX) - 1));
424
3
    return NULL;
425
426
3
#undef PREFIX
427
3
}
428
429
const char h2o_url_host_to_sun_err_is_not_unix_socket[] = "supplied name does not look like an unix-domain socket";
430
431
int h2o_url_init_with_hostport(h2o_url_t *url, h2o_mem_pool_t *pool, const h2o_url_scheme_t *scheme, h2o_iovec_t host,
432
                               uint16_t port, h2o_iovec_t path)
433
0
{
434
0
    url->scheme = scheme;
435
0
    url->path = path;
436
437
0
    if (port == scheme->default_port) {
438
0
        url->_port = 65535;
439
0
        url->authority = h2o_strdup(pool, host.base, host.len);
440
0
        url->host = url->authority;
441
0
    } else {
442
0
        url->_port = port;
443
0
        char _port[sizeof(H2O_UINT16_LONGEST_STR)];
444
0
        int port_len = sprintf(_port, "%" PRIu16, port);
445
0
        if (port_len < 0)
446
0
            return -1;
447
448
0
        url->authority.len = host.len + 1 + port_len;
449
0
        url->authority.base = pool == NULL ? h2o_mem_alloc(url->authority.len) : h2o_mem_alloc_pool(pool, char, url->authority.len);
450
0
        memcpy(url->authority.base, host.base, host.len);
451
0
        memcpy(url->authority.base + host.len, ":", 1);
452
0
        memcpy(url->authority.base + host.len + 1, _port, port_len);
453
0
        url->host = h2o_iovec_init(url->authority.base, url->authority.len - 1 - port_len);
454
0
    }
455
456
0
    return 0;
457
0
}
458
459
int h2o_url_init_with_sun_path(h2o_url_t *url, h2o_mem_pool_t *pool, const h2o_url_scheme_t *scheme, h2o_iovec_t sun_path,
460
                               h2o_iovec_t path)
461
0
{
462
0
    url->scheme = scheme;
463
0
    url->path = path;
464
0
    url->_port = 65535;
465
466
0
#define PREFIX "[unix:"
467
0
#define SUFFIX "]"
468
0
    url->authority.len = strlen(PREFIX SUFFIX) + sun_path.len;
469
0
    url->authority.base = pool == NULL ? h2o_mem_alloc(url->authority.len) : h2o_mem_alloc_pool(pool, char, url->authority.len);
470
0
    memcpy(url->authority.base, PREFIX, sizeof(PREFIX) - 1);
471
0
    memcpy(url->authority.base + sizeof(PREFIX) - 1, sun_path.base, sun_path.len);
472
0
    memcpy(url->authority.base + url->authority.len - 1, SUFFIX, sizeof(SUFFIX) - 1);
473
0
#undef PREFIX
474
0
#undef SUFFIX
475
476
0
    url->host = h2o_iovec_init(url->authority.base + 1, url->authority.len - 2);
477
478
0
    return 0;
479
0
}