Coverage Report

Created: 2025-08-26 06:24

/src/httpd/srclib/apr/uri/apr_uri.c
Line
Count
Source (jump to first uncovered line)
1
/* Licensed to the Apache Software Foundation (ASF) under one or more
2
 * contributor license agreements.  See the NOTICE file distributed with
3
 * this work for additional information regarding copyright ownership.
4
 * The ASF licenses this file to You under the Apache License, Version 2.0
5
 * (the "License"); you may not use this file except in compliance with
6
 * the License.  You may obtain a copy of the License at
7
 *
8
 *     http://www.apache.org/licenses/LICENSE-2.0
9
 *
10
 * Unless required by applicable law or agreed to in writing, software
11
 * distributed under the License is distributed on an "AS IS" BASIS,
12
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
 * See the License for the specific language governing permissions and
14
 * limitations under the License.
15
 */
16
17
/*
18
 * apr_uri.c: URI related utility things
19
 *
20
 */
21
22
#include <stdlib.h>
23
24
#include "apu.h"
25
#include "apr.h"
26
#include "apr_general.h"
27
#include "apr_strings.h"
28
29
#define APR_WANT_STRFUNC
30
#include "apr_want.h"
31
32
#include "apr_uri.h"
33
34
typedef struct schemes_t schemes_t;
35
36
/** Structure to store various schemes and their default ports */
37
struct schemes_t {
38
    /** The name of the scheme */
39
    const char *name;
40
    /** The default port for the scheme */
41
    apr_port_t default_port;
42
};
43
44
/* Some WWW schemes and their default ports; this is basically /etc/services */
45
/* This will become global when the protocol abstraction comes */
46
/* As the schemes are searched by a linear search, */
47
/* they are sorted by their expected frequency */
48
static schemes_t schemes[] =
49
{
50
    {"http",     APR_URI_HTTP_DEFAULT_PORT},
51
    {"ftp",      APR_URI_FTP_DEFAULT_PORT},
52
    {"https",    APR_URI_HTTPS_DEFAULT_PORT},
53
    {"gopher",   APR_URI_GOPHER_DEFAULT_PORT},
54
    {"ldap",     APR_URI_LDAP_DEFAULT_PORT},
55
    {"nntp",     APR_URI_NNTP_DEFAULT_PORT},
56
    {"snews",    APR_URI_SNEWS_DEFAULT_PORT},
57
    {"imap",     APR_URI_IMAP_DEFAULT_PORT},
58
    {"pop",      APR_URI_POP_DEFAULT_PORT},
59
    {"sip",      APR_URI_SIP_DEFAULT_PORT},
60
    {"rtsp",     APR_URI_RTSP_DEFAULT_PORT},
61
    {"wais",     APR_URI_WAIS_DEFAULT_PORT},
62
    {"z39.50r",  APR_URI_WAIS_DEFAULT_PORT},
63
    {"z39.50s",  APR_URI_WAIS_DEFAULT_PORT},
64
    {"prospero", APR_URI_PROSPERO_DEFAULT_PORT},
65
    {"nfs",      APR_URI_NFS_DEFAULT_PORT},
66
    {"tip",      APR_URI_TIP_DEFAULT_PORT},
67
    {"acap",     APR_URI_ACAP_DEFAULT_PORT},
68
    {"telnet",   APR_URI_TELNET_DEFAULT_PORT},
69
    {"ssh",      APR_URI_SSH_DEFAULT_PORT},
70
    { NULL, 0xFFFF }     /* unknown port */
71
};
72
73
/*
74
 * *only* for IPv6 addresses with a zone identifier according to RFC6874
75
 */
76
static apr_status_t detect_scope_zone_id(int *have_zone_id, char const *ipv6addr,
77
                                         size_t len)
78
353
{
79
353
    char *s;
80
81
353
    *have_zone_id = 0;
82
83
353
    if (len < 3) {
84
        /* Need *at least* the three characters for a percent-encoded percent
85
         * sign.
86
         */
87
18
        return APR_SUCCESS;
88
18
    }
89
90
335
    s = memchr(ipv6addr, '%', len);
91
335
    if (s != NULL && s < ipv6addr + len - 2) {
92
        /* RFC3986 is pretty specific about how to percent encode, but
93
         * decoding is to be performed per component, which is what we
94
         * already have here. On the other hand, RFC6874 is clear that
95
         * the delimiter for a zone identifier must be a percent encoded
96
         * percent, i.e. "%25". Any other percent-encoded character is
97
         * invalid here.
98
         */
99
124
        if (s[1] != '2' || s[2] != '5') {
100
112
            return APR_EINVAL;
101
112
        }
102
12
        *have_zone_id = 1;
103
12
    }
104
223
    return APR_SUCCESS;
105
335
}
106
107
static void percent_decode_scope_zone_id(char *hostname)
108
12
{
109
    /* RFC6874 is a little hand-wavy in terms of what to decode. Technically,
110
     * all percent-encoded characters should be decoded, but also, the RFC states
111
     * that they SHOULD not occur, basically.
112
     *
113
     * So let's assume they don't, to keep things simple. Because otherwise we'd
114
     * have to deal with full RFC3986 rules and perform UTF-8 decoding as well
115
     * and all that.
116
     */
117
12
    size_t len = strlen(hostname);
118
119
    /* We know from the caller already that this *is* a percent encoded
120
     * percent sign, so we just want to skip it. Trust the caller here.
121
     */
122
12
    char *s = memchr(hostname, '%', len);
123
12
    size_t offset = s - hostname;
124
12
    memmove(hostname + offset + 1, hostname + offset + 3, len - offset - 2);
125
12
}
126
127
static char * percent_encode_scope_zone_id(apr_pool_t *p, apr_uri_t const *uptr)
128
11
{
129
    /* Inverse to the logic in the decode function, we need to encode the first
130
     * percent sign we encounter (if any).
131
     */
132
11
    size_t len = strlen(uptr->hostname);
133
11
    char * s = memchr(uptr->hostname, '%', len);
134
11
    size_t offset;
135
11
    char *hostcopy;
136
137
11
    if (s == NULL) {
138
4
        return uptr->hostname;
139
4
    }
140
141
7
    offset = s - uptr->hostname;
142
143
7
    hostcopy = apr_palloc(p, len + 3);
144
7
    memcpy(hostcopy, uptr->hostname, offset + 1);
145
7
    hostcopy[offset + 1] = '2';
146
7
    hostcopy[offset + 2] = '5';
147
7
    memcpy(hostcopy + offset + 3, uptr->hostname + offset + 1,
148
7
           len - offset - 1);
149
7
    hostcopy[len + 2] = '\0';
150
151
7
    return hostcopy;
152
11
}
153
154
APR_DECLARE(apr_port_t) apr_uri_port_of_scheme(const char *scheme_str)
155
237
{
156
237
    schemes_t *scheme;
157
158
237
    if (scheme_str) {
159
4.28k
        for (scheme = schemes; scheme->name != NULL; ++scheme) {
160
4.09k
            if (strcasecmp(scheme_str, scheme->name) == 0) {
161
35
                return scheme->default_port;
162
35
            }
163
4.09k
        }
164
228
    }
165
202
    return 0;
166
237
}
167
168
/* Unparse a apr_uri_t structure to an URI string.
169
 * Optionally suppress the password for security reasons.
170
 */
171
APR_DECLARE(char *) apr_uri_unparse(apr_pool_t *p,
172
                                    const apr_uri_t *uptr,
173
                                    unsigned flags)
174
219
{
175
219
    char *ret = "";
176
177
    /* If suppressing the site part, omit both user name & scheme://hostname */
178
219
    if (!(flags & APR_URI_UNP_OMITSITEPART)) {
179
180
        /* Construct a "user:password@" string, honoring the passed
181
         * APR_URI_UNP_ flags: */
182
219
        if (uptr->user || uptr->password) {
183
12
            ret = apr_pstrcat(p,
184
12
                      (uptr->user     && !(flags & APR_URI_UNP_OMITUSER))
185
12
                          ? uptr->user : "",
186
12
                      (uptr->password && !(flags & APR_URI_UNP_OMITPASSWORD))
187
12
                          ? ":" : "",
188
12
                      (uptr->password && !(flags & APR_URI_UNP_OMITPASSWORD))
189
12
                          ? ((flags & APR_URI_UNP_REVEALPASSWORD)
190
6
                              ? uptr->password : "XXXXXXXX")
191
12
                          : "",
192
12
                      ((uptr->user     && !(flags & APR_URI_UNP_OMITUSER)) ||
193
12
                       (uptr->password && !(flags & APR_URI_UNP_OMITPASSWORD)))
194
12
                          ? "@" : "",
195
12
                      NULL);
196
12
        }
197
198
        /* Construct scheme://site string */
199
219
        if (uptr->hostname) {
200
163
            int is_default_port;
201
163
            const char *lbrk = "", *rbrk = "";
202
163
            char *host = uptr->hostname;
203
204
163
            if (strchr(host, ':')) { /* v6 literal */
205
11
                lbrk = "[";
206
11
                rbrk = "]";
207
208
11
                host = percent_encode_scope_zone_id(p, uptr);
209
11
            }
210
211
163
            is_default_port =
212
163
                (uptr->port_str == NULL ||
213
163
                 uptr->port == 0 ||
214
163
                 uptr->port == apr_uri_port_of_scheme(uptr->scheme));
215
216
163
            ret = apr_pstrcat(p, "//", ret, lbrk, host, rbrk,
217
163
                        is_default_port ? "" : ":",
218
163
                        is_default_port ? "" : uptr->port_str,
219
163
                        NULL);
220
163
        }
221
219
  if (uptr->scheme) {
222
145
      ret = apr_pstrcat(p, uptr->scheme, ":", ret, NULL);
223
145
  }
224
219
    }
225
226
    /* Should we suppress all path info? */
227
219
    if (!(flags & APR_URI_UNP_OMITPATHINFO)) {
228
        /* Append path, query and fragment strings: */
229
219
        ret = apr_pstrcat(p,
230
219
                          ret,
231
219
                          (uptr->path)
232
219
                              ? uptr->path : "",
233
219
                          (uptr->query    && !(flags & APR_URI_UNP_OMITQUERY))
234
219
                              ? "?" : "",
235
219
                          (uptr->query    && !(flags & APR_URI_UNP_OMITQUERY))
236
219
                              ? uptr->query : "",
237
219
                          (uptr->fragment && !(flags & APR_URI_UNP_OMITQUERY))
238
219
                              ? "#" : NULL,
239
219
                          (uptr->fragment && !(flags & APR_URI_UNP_OMITQUERY))
240
219
                              ? uptr->fragment : NULL,
241
219
                          NULL);
242
219
    }
243
219
    return ret;
244
219
}
245
246
/* Here is the hand-optimized parse_uri_components().  There are some wild
247
 * tricks we could pull in assembly language that we don't pull here... like we
248
 * can do word-at-time scans for delimiter characters using the same technique
249
 * that fast memchr()s use.  But that would be way non-portable. -djg
250
 */
251
252
/* We have a apr_table_t that we can index by character and it tells us if the
253
 * character is one of the interesting delimiters.  Note that we even get
254
 * compares for NUL for free -- it's just another delimiter.
255
 */
256
257
7.23k
#define T_SLASH           0x01        /* '/' */
258
14.3k
#define T_QUESTION        0x02        /* '?' */
259
14.3k
#define T_HASH            0x04        /* '#' */
260
6.01k
#define T_ALPHA           0x08        /* 'A' ... 'Z', 'a' ... 'z' */
261
5.49k
#define T_SCHEME          0x10        /* '0' ... '9', '-', '+', '.'
262
                                       * (allowed in scheme except first char)
263
                                       */
264
14.3k
#define T_NUL             0x80        /* '\0' */
265
266
#if APR_CHARSET_EBCDIC
267
/* Delimiter table for the EBCDIC character set */
268
static const unsigned char uri_delims[256] = {
269
    T_NUL,                      /* 0x00     */
270
    0,                          /* 0x01     */
271
    0,                          /* 0x02     */
272
    0,                          /* 0x03     */
273
    0,                          /* 0x04     */
274
    0,                          /* 0x05     */
275
    0,                          /* 0x06     */
276
    0,                          /* 0x07     */
277
    0,                          /* 0x08     */
278
    0,                          /* 0x09     */
279
    0,                          /* 0x0a     */
280
    0,                          /* 0x0b     */
281
    0,                          /* 0x0c     */
282
    0,                          /* 0x0d     */
283
    0,                          /* 0x0e     */
284
    0,                          /* 0x0f     */
285
    0,                          /* 0x10     */
286
    0,                          /* 0x11     */
287
    0,                          /* 0x12     */
288
    0,                          /* 0x13     */
289
    0,                          /* 0x14     */
290
    0,                          /* 0x15     */
291
    0,                          /* 0x16     */
292
    0,                          /* 0x17     */
293
    0,                          /* 0x18     */
294
    0,                          /* 0x19     */
295
    0,                          /* 0x1a     */
296
    0,                          /* 0x1b     */
297
    0,                          /* 0x1c     */
298
    0,                          /* 0x1d     */
299
    0,                          /* 0x1e     */
300
    0,                          /* 0x1f     */
301
    0,                          /* 0x20     */
302
    0,                          /* 0x21     */
303
    0,                          /* 0x22     */
304
    0,                          /* 0x23     */
305
    0,                          /* 0x24     */
306
    0,                          /* 0x25     */
307
    0,                          /* 0x26     */
308
    0,                          /* 0x27     */
309
    0,                          /* 0x28     */
310
    0,                          /* 0x29     */
311
    0,                          /* 0x2a     */
312
    0,                          /* 0x2b     */
313
    0,                          /* 0x2c     */
314
    0,                          /* 0x2d     */
315
    0,                          /* 0x2e     */
316
    0,                          /* 0x2f     */
317
    0,                          /* 0x30     */
318
    0,                          /* 0x31     */
319
    0,                          /* 0x32     */
320
    0,                          /* 0x33     */
321
    0,                          /* 0x34     */
322
    0,                          /* 0x35     */
323
    0,                          /* 0x36     */
324
    0,                          /* 0x37     */
325
    0,                          /* 0x38     */
326
    0,                          /* 0x39     */
327
    0,                          /* 0x3a     */
328
    0,                          /* 0x3b     */
329
    0,                          /* 0x3c     */
330
    0,                          /* 0x3d     */
331
    0,                          /* 0x3e     */
332
    0,                          /* 0x3f     */
333
    0,                          /* 0x40 ' ' */
334
    0,                          /* 0x41     */
335
    0,                          /* 0x42     */
336
    0,                          /* 0x43     */
337
    0,                          /* 0x44     */
338
    0,                          /* 0x45     */
339
    0,                          /* 0x46     */
340
    0,                          /* 0x47     */
341
    0,                          /* 0x48     */
342
    0,                          /* 0x49     */
343
    0,                          /* 0x4a '[' */
344
    T_SCHEME,                   /* 0x4b '.' */
345
    0,                          /* 0x4c '<' */
346
    0,                          /* 0x4d '(' */
347
    T_SCHEME,                   /* 0x4e '+' */
348
    0,                          /* 0x4f '!' */
349
    0,                          /* 0x50 '&' */
350
    0,                          /* 0x51     */
351
    0,                          /* 0x52     */
352
    0,                          /* 0x53     */
353
    0,                          /* 0x54     */
354
    0,                          /* 0x55     */
355
    0,                          /* 0x56     */
356
    0,                          /* 0x57     */
357
    0,                          /* 0x58     */
358
    0,                          /* 0x59     */
359
    0,                          /* 0x5a ']' */
360
    0,                          /* 0x5b '$' */
361
    0,                          /* 0x5c '*' */
362
    0,                          /* 0x5d ')' */
363
    0,                          /* 0x5e ';' */
364
    0,                          /* 0x5f '^' */
365
    T_SCHEME,                   /* 0x60 '-' */
366
    T_SLASH,                    /* 0x61 '/' */
367
    0,                          /* 0x62     */
368
    0,                          /* 0x63     */
369
    0,                          /* 0x64     */
370
    0,                          /* 0x65     */
371
    0,                          /* 0x66     */
372
    0,                          /* 0x67     */
373
    0,                          /* 0x68     */
374
    0,                          /* 0x69     */
375
    0,                          /* 0x6a '|' */
376
    0,                          /* 0x6b ',' */
377
    0,                          /* 0x6c '%' */
378
    0,                          /* 0x6d '_' */
379
    0,                          /* 0x6e '>' */
380
    T_QUESTION,                 /* 0x6f '?' */
381
    0,                          /* 0x70     */
382
    0,                          /* 0x71     */
383
    0,                          /* 0x72     */
384
    0,                          /* 0x73     */
385
    0,                          /* 0x74     */
386
    0,                          /* 0x75     */
387
    0,                          /* 0x76     */
388
    0,                          /* 0x77     */
389
    0,                          /* 0x78     */
390
    0,                          /* 0x79 '`' */
391
    0,                          /* 0x7a ':' */
392
    T_HASH,                     /* 0x7b '#' */
393
    0,                          /* 0x7c '@' */
394
    0,                          /* 0x7d ''' */
395
    0,                          /* 0x7e '=' */
396
    0,                          /* 0x7f '"' */
397
    0,                          /* 0x80     */
398
    T_ALPHA,                    /* 0x81 'a' */
399
    T_ALPHA,                    /* 0x82 'b' */
400
    T_ALPHA,                    /* 0x83 'c' */
401
    T_ALPHA,                    /* 0x84 'd' */
402
    T_ALPHA,                    /* 0x85 'e' */
403
    T_ALPHA,                    /* 0x86 'f' */
404
    T_ALPHA,                    /* 0x87 'g' */
405
    T_ALPHA,                    /* 0x88 'h' */
406
    T_ALPHA,                    /* 0x89 'i' */
407
    0,                          /* 0x8a     */
408
    0,                          /* 0x8b     */
409
    0,                          /* 0x8c     */
410
    0,                          /* 0x8d     */
411
    0,                          /* 0x8e     */
412
    0,                          /* 0x8f     */
413
    0,                          /* 0x90     */
414
    T_ALPHA,                    /* 0x91 'j' */
415
    T_ALPHA,                    /* 0x92 'k' */
416
    T_ALPHA,                    /* 0x93 'l' */
417
    T_ALPHA,                    /* 0x94 'm' */
418
    T_ALPHA,                    /* 0x95 'n' */
419
    T_ALPHA,                    /* 0x96 'o' */
420
    T_ALPHA,                    /* 0x97 'p' */
421
    T_ALPHA,                    /* 0x98 'q' */
422
    T_ALPHA,                    /* 0x99 'r' */
423
    0,                          /* 0x9a     */
424
    0,                          /* 0x9b     */
425
    0,                          /* 0x9c     */
426
    0,                          /* 0x9d     */
427
    0,                          /* 0x9e     */
428
    0,                          /* 0x9f     */
429
    0,                          /* 0xa0     */
430
    0,                          /* 0xa1 '~' */
431
    T_ALPHA,                    /* 0xa2 's' */
432
    T_ALPHA,                    /* 0xa3 't' */
433
    T_ALPHA,                    /* 0xa4 'u' */
434
    T_ALPHA,                    /* 0xa5 'v' */
435
    T_ALPHA,                    /* 0xa6 'w' */
436
    T_ALPHA,                    /* 0xa7 'x' */
437
    T_ALPHA,                    /* 0xa8 'y' */
438
    T_ALPHA,                    /* 0xa9 'z' */
439
    0,                          /* 0xaa     */
440
    0,                          /* 0xab     */
441
    0,                          /* 0xac     */
442
    0,                          /* 0xad     */
443
    0,                          /* 0xae     */
444
    0,                          /* 0xaf     */
445
    0,                          /* 0xb0     */
446
    0,                          /* 0xb1     */
447
    0,                          /* 0xb2     */
448
    0,                          /* 0xb3     */
449
    0,                          /* 0xb4     */
450
    0,                          /* 0xb5     */
451
    0,                          /* 0xb6     */
452
    0,                          /* 0xb7     */
453
    0,                          /* 0xb8     */
454
    0,                          /* 0xb9     */
455
    0,                          /* 0xba     */
456
    0,                          /* 0xbb     */
457
    0,                          /* 0xbc     */
458
    0,                          /* 0xbd     */
459
    0,                          /* 0xbe     */
460
    0,                          /* 0xbf     */
461
    0,                          /* 0xc0 '{' */
462
    T_ALPHA,                    /* 0xc1 'A' */
463
    T_ALPHA,                    /* 0xc2 'B' */
464
    T_ALPHA,                    /* 0xc3 'C' */
465
    T_ALPHA,                    /* 0xc4 'D' */
466
    T_ALPHA,                    /* 0xc5 'E' */
467
    T_ALPHA,                    /* 0xc6 'F' */
468
    T_ALPHA,                    /* 0xc7 'G' */
469
    T_ALPHA,                    /* 0xc8 'H' */
470
    T_ALPHA,                    /* 0xc9 'I' */
471
    0,                          /* 0xca     */
472
    0,                          /* 0xcb     */
473
    0,                          /* 0xcc     */
474
    0,                          /* 0xcd     */
475
    0,                          /* 0xce     */
476
    0,                          /* 0xcf     */
477
    0,                          /* 0xd0 '}' */
478
    T_ALPHA,                    /* 0xd1 'J' */
479
    T_ALPHA,                    /* 0xd2 'K' */
480
    T_ALPHA,                    /* 0xd3 'L' */
481
    T_ALPHA,                    /* 0xd4 'M' */
482
    T_ALPHA,                    /* 0xd5 'N' */
483
    T_ALPHA,                    /* 0xd6 'O' */
484
    T_ALPHA,                    /* 0xd7 'P' */
485
    T_ALPHA,                    /* 0xd8 'Q' */
486
    T_ALPHA,                    /* 0xd9 'R' */
487
    0,                          /* 0xda     */
488
    0,                          /* 0xdb     */
489
    0,                          /* 0xdc     */
490
    0,                          /* 0xdd     */
491
    0,                          /* 0xde     */
492
    0,                          /* 0xdf     */
493
    0,                          /* 0xe0 '\' */
494
    0,                          /* 0xe1     */
495
    T_ALPHA,                    /* 0xe2 'S' */
496
    T_ALPHA,                    /* 0xe3 'T' */
497
    T_ALPHA,                    /* 0xe4 'U' */
498
    T_ALPHA,                    /* 0xe5 'V' */
499
    T_ALPHA,                    /* 0xe6 'W' */
500
    T_ALPHA,                    /* 0xe7 'X' */
501
    T_ALPHA,                    /* 0xe8 'Y' */
502
    T_ALPHA,                    /* 0xe9 'Z' */
503
    0,                          /* 0xea     */
504
    0,                          /* 0xeb     */
505
    0,                          /* 0xec     */
506
    0,                          /* 0xed     */
507
    0,                          /* 0xee     */
508
    0,                          /* 0xef     */
509
    T_SCHEME,                   /* 0xf0 '0' */
510
    T_SCHEME,                   /* 0xf1 '1' */
511
    T_SCHEME,                   /* 0xf2 '2' */
512
    T_SCHEME,                   /* 0xf3 '3' */
513
    T_SCHEME,                   /* 0xf4 '4' */
514
    T_SCHEME,                   /* 0xf5 '5' */
515
    T_SCHEME,                   /* 0xf6 '6' */
516
    T_SCHEME,                   /* 0xf7 '7' */
517
    T_SCHEME,                   /* 0xf8 '8' */
518
    T_SCHEME,                   /* 0xf9 '9' */
519
    0,                          /* 0xfa     */
520
    0,                          /* 0xfb     */
521
    0,                          /* 0xfc     */
522
    0,                          /* 0xfd     */
523
    0,                          /* 0xfe     */
524
    0                           /* 0xff     */
525
};
526
#else
527
/* Delimiter table for the ASCII character set */
528
static const unsigned char uri_delims[256] = {
529
    T_NUL,                      /* 0x00     */
530
    0,                          /* 0x01     */
531
    0,                          /* 0x02     */
532
    0,                          /* 0x03     */
533
    0,                          /* 0x04     */
534
    0,                          /* 0x05     */
535
    0,                          /* 0x06     */
536
    0,                          /* 0x07     */
537
    0,                          /* 0x08     */
538
    0,                          /* 0x09     */
539
    0,                          /* 0x0a     */
540
    0,                          /* 0x0b     */
541
    0,                          /* 0x0c     */
542
    0,                          /* 0x0d     */
543
    0,                          /* 0x0e     */
544
    0,                          /* 0x0f     */
545
    0,                          /* 0x10     */
546
    0,                          /* 0x11     */
547
    0,                          /* 0x12     */
548
    0,                          /* 0x13     */
549
    0,                          /* 0x14     */
550
    0,                          /* 0x15     */
551
    0,                          /* 0x16     */
552
    0,                          /* 0x17     */
553
    0,                          /* 0x18     */
554
    0,                          /* 0x19     */
555
    0,                          /* 0x1a     */
556
    0,                          /* 0x1b     */
557
    0,                          /* 0x1c     */
558
    0,                          /* 0x1d     */
559
    0,                          /* 0x1e     */
560
    0,                          /* 0x1f     */
561
    0,                          /* 0x20 ' ' */
562
    0,                          /* 0x21 '!' */
563
    0,                          /* 0x22 '"' */
564
    T_HASH,                     /* 0x23 '#' */
565
    0,                          /* 0x24 '$' */
566
    0,                          /* 0x25 '%' */
567
    0,                          /* 0x26 '&' */
568
    0,                          /* 0x27 ''' */
569
    0,                          /* 0x28 '(' */
570
    0,                          /* 0x29 ')' */
571
    0,                          /* 0x2a '*' */
572
    T_SCHEME,                   /* 0x2b '+' */
573
    0,                          /* 0x2c ',' */
574
    T_SCHEME,                   /* 0x2d '-' */
575
    T_SCHEME,                   /* 0x2e '.' */
576
    T_SLASH,                    /* 0x2f '/' */
577
    T_SCHEME,                   /* 0x30 '0' */
578
    T_SCHEME,                   /* 0x31 '1' */
579
    T_SCHEME,                   /* 0x32 '2' */
580
    T_SCHEME,                   /* 0x33 '3' */
581
    T_SCHEME,                   /* 0x34 '4' */
582
    T_SCHEME,                   /* 0x35 '5' */
583
    T_SCHEME,                   /* 0x36 '6' */
584
    T_SCHEME,                   /* 0x37 '7' */
585
    T_SCHEME,                   /* 0x38 '8' */
586
    T_SCHEME,                   /* 0x39 '9' */
587
    0,                          /* 0x3a ':' */
588
    0,                          /* 0x3b ';' */
589
    0,                          /* 0x3c '<' */
590
    0,                          /* 0x3d '=' */
591
    0,                          /* 0x3e '>' */
592
    T_QUESTION,                 /* 0x3f '?' */
593
    0,                          /* 0x40 '@' */
594
    T_ALPHA,                    /* 0x41 'A' */
595
    T_ALPHA,                    /* 0x42 'B' */
596
    T_ALPHA,                    /* 0x43 'C' */
597
    T_ALPHA,                    /* 0x44 'D' */
598
    T_ALPHA,                    /* 0x45 'E' */
599
    T_ALPHA,                    /* 0x46 'F' */
600
    T_ALPHA,                    /* 0x47 'G' */
601
    T_ALPHA,                    /* 0x48 'H' */
602
    T_ALPHA,                    /* 0x49 'I' */
603
    T_ALPHA,                    /* 0x4a 'J' */
604
    T_ALPHA,                    /* 0x4b 'K' */
605
    T_ALPHA,                    /* 0x4c 'L' */
606
    T_ALPHA,                    /* 0x4d 'M' */
607
    T_ALPHA,                    /* 0x4e 'N' */
608
    T_ALPHA,                    /* 0x4f 'O' */
609
    T_ALPHA,                    /* 0x50 'P' */
610
    T_ALPHA,                    /* 0x51 'Q' */
611
    T_ALPHA,                    /* 0x52 'R' */
612
    T_ALPHA,                    /* 0x53 'S' */
613
    T_ALPHA,                    /* 0x54 'T' */
614
    T_ALPHA,                    /* 0x55 'U' */
615
    T_ALPHA,                    /* 0x56 'V' */
616
    T_ALPHA,                    /* 0x57 'W' */
617
    T_ALPHA,                    /* 0x58 'X' */
618
    T_ALPHA,                    /* 0x59 'Y' */
619
    T_ALPHA,                    /* 0x5a 'Z' */
620
    0,                          /* 0x5b '[' */
621
    0,                          /* 0x5c '\' */
622
    0,                          /* 0x5d ']' */
623
    0,                          /* 0x5e '^' */
624
    0,                          /* 0x5f '_' */
625
    0,                          /* 0x60 '`' */
626
    T_ALPHA,                    /* 0x61 'a' */
627
    T_ALPHA,                    /* 0x62 'b' */
628
    T_ALPHA,                    /* 0x63 'c' */
629
    T_ALPHA,                    /* 0x64 'd' */
630
    T_ALPHA,                    /* 0x65 'e' */
631
    T_ALPHA,                    /* 0x66 'f' */
632
    T_ALPHA,                    /* 0x67 'g' */
633
    T_ALPHA,                    /* 0x68 'h' */
634
    T_ALPHA,                    /* 0x69 'i' */
635
    T_ALPHA,                    /* 0x6a 'j' */
636
    T_ALPHA,                    /* 0x6b 'k' */
637
    T_ALPHA,                    /* 0x6c 'l' */
638
    T_ALPHA,                    /* 0x6d 'm' */
639
    T_ALPHA,                    /* 0x6e 'n' */
640
    T_ALPHA,                    /* 0x6f 'o' */
641
    T_ALPHA,                    /* 0x70 'p' */
642
    T_ALPHA,                    /* 0x71 'q' */
643
    T_ALPHA,                    /* 0x72 'r' */
644
    T_ALPHA,                    /* 0x73 's' */
645
    T_ALPHA,                    /* 0x74 't' */
646
    T_ALPHA,                    /* 0x75 'u' */
647
    T_ALPHA,                    /* 0x76 'v' */
648
    T_ALPHA,                    /* 0x77 'w' */
649
    T_ALPHA,                    /* 0x78 'x' */
650
    T_ALPHA,                    /* 0x79 'y' */
651
    T_ALPHA,                    /* 0x7a 'z' */
652
    0,                          /* 0x7b '{' */
653
    0,                          /* 0x7c '|' */
654
    0,                          /* 0x7d '}' */
655
    0,                          /* 0x7e '~' */
656
    0,                          /* 0x7f     */
657
    0,                          /* 0x80     */
658
    0,                          /* 0x81     */
659
    0,                          /* 0x82     */
660
    0,                          /* 0x83     */
661
    0,                          /* 0x84     */
662
    0,                          /* 0x85     */
663
    0,                          /* 0x86     */
664
    0,                          /* 0x87     */
665
    0,                          /* 0x88     */
666
    0,                          /* 0x89     */
667
    0,                          /* 0x8a     */
668
    0,                          /* 0x8b     */
669
    0,                          /* 0x8c     */
670
    0,                          /* 0x8d     */
671
    0,                          /* 0x8e     */
672
    0,                          /* 0x8f     */
673
    0,                          /* 0x90     */
674
    0,                          /* 0x91     */
675
    0,                          /* 0x92     */
676
    0,                          /* 0x93     */
677
    0,                          /* 0x94     */
678
    0,                          /* 0x95     */
679
    0,                          /* 0x96     */
680
    0,                          /* 0x97     */
681
    0,                          /* 0x98     */
682
    0,                          /* 0x99     */
683
    0,                          /* 0x9a     */
684
    0,                          /* 0x9b     */
685
    0,                          /* 0x9c     */
686
    0,                          /* 0x9d     */
687
    0,                          /* 0x9e     */
688
    0,                          /* 0x9f     */
689
    0,                          /* 0xa0     */
690
    0,                          /* 0xa1     */
691
    0,                          /* 0xa2     */
692
    0,                          /* 0xa3     */
693
    0,                          /* 0xa4     */
694
    0,                          /* 0xa5     */
695
    0,                          /* 0xa6     */
696
    0,                          /* 0xa7     */
697
    0,                          /* 0xa8     */
698
    0,                          /* 0xa9     */
699
    0,                          /* 0xaa     */
700
    0,                          /* 0xab     */
701
    0,                          /* 0xac     */
702
    0,                          /* 0xad     */
703
    0,                          /* 0xae     */
704
    0,                          /* 0xaf     */
705
    0,                          /* 0xb0     */
706
    0,                          /* 0xb1     */
707
    0,                          /* 0xb2     */
708
    0,                          /* 0xb3     */
709
    0,                          /* 0xb4     */
710
    0,                          /* 0xb5     */
711
    0,                          /* 0xb6     */
712
    0,                          /* 0xb7     */
713
    0,                          /* 0xb8     */
714
    0,                          /* 0xb9     */
715
    0,                          /* 0xba     */
716
    0,                          /* 0xbb     */
717
    0,                          /* 0xbc     */
718
    0,                          /* 0xbd     */
719
    0,                          /* 0xbe     */
720
    0,                          /* 0xbf     */
721
    0,                          /* 0xc0     */
722
    0,                          /* 0xc1     */
723
    0,                          /* 0xc2     */
724
    0,                          /* 0xc3     */
725
    0,                          /* 0xc4     */
726
    0,                          /* 0xc5     */
727
    0,                          /* 0xc6     */
728
    0,                          /* 0xc7     */
729
    0,                          /* 0xc8     */
730
    0,                          /* 0xc9     */
731
    0,                          /* 0xca     */
732
    0,                          /* 0xcb     */
733
    0,                          /* 0xcc     */
734
    0,                          /* 0xcd     */
735
    0,                          /* 0xce     */
736
    0,                          /* 0xcf     */
737
    0,                          /* 0xd0     */
738
    0,                          /* 0xd1     */
739
    0,                          /* 0xd2     */
740
    0,                          /* 0xd3     */
741
    0,                          /* 0xd4     */
742
    0,                          /* 0xd5     */
743
    0,                          /* 0xd6     */
744
    0,                          /* 0xd7     */
745
    0,                          /* 0xd8     */
746
    0,                          /* 0xd9     */
747
    0,                          /* 0xda     */
748
    0,                          /* 0xdb     */
749
    0,                          /* 0xdc     */
750
    0,                          /* 0xdd     */
751
    0,                          /* 0xde     */
752
    0,                          /* 0xdf     */
753
    0,                          /* 0xe0     */
754
    0,                          /* 0xe1     */
755
    0,                          /* 0xe2     */
756
    0,                          /* 0xe3     */
757
    0,                          /* 0xe4     */
758
    0,                          /* 0xe5     */
759
    0,                          /* 0xe6     */
760
    0,                          /* 0xe7     */
761
    0,                          /* 0xe8     */
762
    0,                          /* 0xe9     */
763
    0,                          /* 0xea     */
764
    0,                          /* 0xeb     */
765
    0,                          /* 0xec     */
766
    0,                          /* 0xed     */
767
    0,                          /* 0xee     */
768
    0,                          /* 0xef     */
769
    0,                          /* 0xf0     */
770
    0,                          /* 0xf1     */
771
    0,                          /* 0xf2     */
772
    0,                          /* 0xf3     */
773
    0,                          /* 0xf4     */
774
    0,                          /* 0xf5     */
775
    0,                          /* 0xf6     */
776
    0,                          /* 0xf7     */
777
    0,                          /* 0xf8     */
778
    0,                          /* 0xf9     */
779
    0,                          /* 0xfa     */
780
    0,                          /* 0xfb     */
781
    0,                          /* 0xfc     */
782
    0,                          /* 0xfd     */
783
    0,                          /* 0xfe     */
784
    0                           /* 0xff     */
785
};
786
#endif
787
788
789
/* it works like this:
790
    if (uri_delims[ch] & NOTEND_foobar) {
791
        then we're not at a delimiter for foobar
792
    }
793
*/
794
795
7.23k
#define NOTEND_HOSTINFO   (T_SLASH | T_QUESTION | T_HASH | T_NUL)
796
7.13k
#define NOTEND_PATH       (T_QUESTION | T_HASH | T_NUL)
797
798
/* parse_uri_components():
799
 * Parse a given URI, fill in all supplied fields of a uri_components
800
 * structure. This eliminates the necessity of extracting host, port,
801
 * path, query info repeatedly in the modules.
802
 * Side effects:
803
 *  - fills in fields of uri_components *uptr
804
 *  - none on any of the r->* fields
805
 */
806
APR_DECLARE(apr_status_t) apr_uri_parse(apr_pool_t *p, const char *uri,
807
                                        apr_uri_t *uptr)
808
601
{
809
601
    const char *s;
810
601
    const char *s1;
811
601
    const char *hostinfo;
812
601
    char *endstr;
813
601
    int port;
814
601
    int v6_offset1 = 0, v6_offset2 = 0;
815
601
    int have_zone_id = 0;
816
817
    /* Initialize the structure. parse_uri() and parse_uri_components()
818
     * can be called more than once per request.
819
     */
820
601
    memset (uptr, '\0', sizeof(*uptr));
821
601
    uptr->is_initialized = 1;
822
823
    /* We assume the processor has a branch predictor like most --
824
     * it assumes forward branches are untaken and backwards are taken.  That's
825
     * the reason for the gotos.  -djg
826
     */
827
601
    if (uri[0] == '/') {
828
        /* RFC2396 #4.3 says that two leading slashes mean we have an
829
         * authority component, not a path!  Fixing this looks scary
830
         * with the gotos here.  But if the existing logic is valid,
831
         * then presumably a goto pointing to deal_with_authority works.
832
         *
833
         * RFC2396 describes this as resolving an ambiguity.  In the
834
         * case of three or more slashes there would seem to be no
835
         * ambiguity, so it is a path after all.
836
         */
837
90
        if (uri[1] == '/' && uri[2] != '/') {
838
48
            s = uri + 2 ;
839
48
            goto deal_with_authority ;
840
48
        }
841
842
506
deal_with_path:
843
        /* we expect uri to point to first character of path ... remember
844
         * that the path could be empty -- http://foobar?query for example
845
         */
846
506
        s = uri;
847
7.13k
        while ((uri_delims[*(unsigned char *)s] & NOTEND_PATH) == 0) {
848
6.62k
            ++s;
849
6.62k
        }
850
506
        if (s != uri) {
851
366
            uptr->path = apr_pstrmemdup(p, uri, s - uri);
852
366
        }
853
506
        if (*s == 0) {
854
437
            return APR_SUCCESS;
855
437
        }
856
69
        if (*s == '?') {
857
40
            ++s;
858
40
            s1 = strchr(s, '#');
859
40
            if (s1) {
860
16
                uptr->fragment = apr_pstrdup(p, s1 + 1);
861
16
                uptr->query = apr_pstrmemdup(p, s, s1 - s);
862
16
            }
863
24
            else {
864
24
                uptr->query = apr_pstrdup(p, s);
865
24
            }
866
40
            return APR_SUCCESS;
867
40
        }
868
        /* otherwise it's a fragment */
869
29
        uptr->fragment = apr_pstrdup(p, s + 1);
870
29
        return APR_SUCCESS;
871
69
    }
872
873
    /* find the scheme: */
874
511
    s = uri;
875
    /* first char must be letter */
876
511
    if (uri_delims[*(unsigned char *)s] & T_ALPHA) {
877
437
        ++s;
878
5.49k
        while ((uri_delims[*(unsigned char *)s] & (T_ALPHA|T_SCHEME)))
879
5.06k
            ++s;
880
437
    }
881
    /* scheme must be non-empty and followed by : */
882
511
    if (s != uri && s[0] == ':') {
883
370
        uptr->scheme = apr_pstrmemdup(p, uri, s - uri);
884
370
        s++;
885
370
    }
886
141
    else {
887
        /* No valid scheme, restart from the beginning */
888
141
        s = uri;
889
141
    }
890
891
511
    if (s[0] != '/' || s[1] != '/') {
892
197
        if (uri == s) {
893
            /*
894
             * RFC 3986 3.3: If we have no scheme and no authority,
895
             * the leading segment of a relative path must not contain a ':'.
896
             */
897
141
            char *first_slash = strchr(uri, '/');
898
141
            if (first_slash) {
899
1.39k
                while (s < first_slash) {
900
1.34k
                    if (s[0] == ':')
901
37
                        return APR_EGENERAL;
902
1.30k
                    ++s;
903
1.30k
                }
904
                /* no scheme but relative path, e.g. '../image.jpg' */
905
87
            }
906
54
            else {
907
54
                if (strchr(uri, ':') != NULL)
908
12
                    return APR_EGENERAL;
909
                /* no scheme, no slash, but relative path, e.g. 'image.jpg' */
910
54
            }
911
92
            goto deal_with_path;
912
141
        }
913
        /* scheme and relative path */
914
56
        uri = s;
915
56
        goto deal_with_path;
916
197
    }
917
918
314
    s += 2;
919
920
362
deal_with_authority:
921
362
    hostinfo = s;
922
7.23k
    while ((uri_delims[*(unsigned char *)s] & NOTEND_HOSTINFO) == 0) {
923
6.87k
        ++s;
924
6.87k
    }
925
362
    uri = s;        /* whatever follows hostinfo is start of uri */
926
362
    uptr->hostinfo = apr_pstrmemdup(p, hostinfo, uri - hostinfo);
927
928
    /* If there's a username:password@host:port, the @ we want is the last @...
929
     * too bad there's no memrchr()... For the C purists, note that hostinfo
930
     * is definitely not the first character of the original uri so therefore
931
     * &hostinfo[-1] < &hostinfo[0] ... and this loop is valid C.
932
     */
933
6.37k
    do {
934
6.37k
        --s;
935
6.37k
    } while (s >= hostinfo && *s != '@');
936
362
    if (s < hostinfo) {
937
        /* again we want the common case to be fall through */
938
362
deal_with_host:
939
        /* We expect hostinfo to point to the first character of
940
         * the hostname.  If there's a port it is the first colon,
941
         * except with IPv6.
942
         *
943
         * IPv6 also has the interesting property (RFC6874) that it may contain
944
         * a percent-encoded percent delimiting the zone identifier. We need to
945
         * unescape that.
946
         */
947
362
        if (*hostinfo == '[') {
948
            /* zone identifier */
949
92
            apr_status_t err = detect_scope_zone_id(&have_zone_id, hostinfo,
950
92
                                                    uri - hostinfo);
951
            /* FIXME: Ignore APR_EINVAL (invalid escaped character) for now as
952
             * old code may rely on it silently getting ignored?
953
             */
954
92
            if ((err != APR_SUCCESS) && (err != APR_EINVAL)) {
955
0
                return err;
956
0
            }
957
958
            /* Port */
959
92
            v6_offset1 = 1;
960
92
            v6_offset2 = 2;
961
92
            s = memchr(hostinfo, ']', uri - hostinfo);
962
92
            if (s == NULL) {
963
24
                return APR_EGENERAL;
964
24
            }
965
68
            if (*++s != ':') {
966
49
                s = NULL; /* no port */
967
49
            }
968
68
        }
969
270
        else {
970
270
            s = memchr(hostinfo, ':', uri - hostinfo);
971
270
        }
972
338
        if (s == NULL) {
973
            /* we expect the common case to have no port */
974
83
            uptr->hostname = apr_pstrmemdup(p,
975
83
                                            hostinfo + v6_offset1,
976
83
                                            uri - hostinfo - v6_offset2);
977
83
            if (have_zone_id) {
978
2
                percent_decode_scope_zone_id(uptr->hostname);
979
2
            }
980
83
            goto deal_with_path;
981
83
        }
982
255
        uptr->hostname = apr_pstrmemdup(p,
983
255
                                        hostinfo + v6_offset1,
984
255
                                        s - hostinfo - v6_offset2);
985
255
        if (have_zone_id) {
986
3
            percent_decode_scope_zone_id(uptr->hostname);
987
3
        }
988
255
        ++s;
989
255
        uptr->port_str = apr_pstrmemdup(p, s, uri - s);
990
255
        if (uri != s) {
991
91
            port = strtol(uptr->port_str, &endstr, 10);
992
91
            uptr->port = port;
993
91
            if (*endstr == '\0') {
994
69
                goto deal_with_path;
995
69
            }
996
            /* Invalid characters after ':' found */
997
22
            return APR_EGENERAL;
998
91
        }
999
164
        uptr->port = apr_uri_port_of_scheme(uptr->scheme);
1000
164
        goto deal_with_path;
1001
255
    }
1002
1003
    /* first colon delimits username:password */
1004
63
    s1 = memchr(hostinfo, ':', s - hostinfo);
1005
63
    if (s1) {
1006
32
        uptr->user = apr_pstrmemdup(p, hostinfo, s1 - hostinfo);
1007
32
        ++s1;
1008
32
        uptr->password = apr_pstrmemdup(p, s1, s - s1);
1009
32
    }
1010
31
    else {
1011
31
        uptr->user = apr_pstrmemdup(p, hostinfo, s - hostinfo);
1012
31
    }
1013
63
    hostinfo = s + 1;
1014
63
    goto deal_with_host;
1015
362
}
1016
1017
/* Special case for CONNECT parsing: it comes with the hostinfo part only */
1018
/* See the INTERNET-DRAFT document "Tunneling SSL Through a WWW Proxy"
1019
 * currently at http://www.mcom.com/newsref/std/tunneling_ssl.html
1020
 * for the format of the "CONNECT host:port HTTP/1.0" request
1021
 */
1022
APR_DECLARE(apr_status_t) apr_uri_parse_hostinfo(apr_pool_t *p,
1023
                                                 const char *hostinfo,
1024
                                                 apr_uri_t *uptr)
1025
316
{
1026
316
    const char *s;
1027
316
    char *endstr;
1028
316
    const char *rsb;
1029
316
    int v6_offset1 = 0;
1030
1031
    /* Initialize the structure. parse_uri() and parse_uri_components()
1032
     * can be called more than once per request.
1033
     */
1034
316
    memset(uptr, '\0', sizeof(*uptr));
1035
316
    uptr->is_initialized = 1;
1036
316
    uptr->hostinfo = apr_pstrdup(p, hostinfo);
1037
1038
    /* We expect hostinfo to point to the first character of
1039
     * the hostname.  There must be a port, separated by a colon
1040
     */
1041
316
    if (*hostinfo == '[') {
1042
30
        if ((rsb = strchr(hostinfo, ']')) == NULL ||
1043
30
            *(rsb + 1) != ':') {
1044
25
            return APR_EGENERAL;
1045
25
        }
1046
        /* literal IPv6 address */
1047
5
        s = rsb + 1;
1048
5
        ++hostinfo;
1049
5
        v6_offset1 = 1;
1050
5
    }
1051
286
    else {
1052
286
        s = strchr(hostinfo, ':');
1053
286
    }
1054
291
    if (s == NULL) {
1055
30
        return APR_EGENERAL;
1056
30
    }
1057
261
    uptr->hostname = apr_pstrndup(p, hostinfo, s - hostinfo - v6_offset1);
1058
1059
    /* Again, ensure zone IDs are decoded. */
1060
261
    int have_zone_id = 0;
1061
261
    apr_status_t err = detect_scope_zone_id(&have_zone_id, uptr->hostname,
1062
261
                                            strlen(uptr->hostname));
1063
    /* FIXME: Ignore APR_EINVAL (invalid escaped character) for now as old code
1064
     * may rely on it silently getting ignored?
1065
     */
1066
261
    if ((err != APR_SUCCESS) && (err != APR_EINVAL)) {
1067
0
        return err;
1068
0
    }
1069
261
    if (have_zone_id) {
1070
7
        percent_decode_scope_zone_id(uptr->hostname);
1071
7
    }
1072
1073
261
    ++s;
1074
261
    uptr->port_str = apr_pstrdup(p, s);
1075
261
    if (*s != '\0') {
1076
249
        uptr->port = (unsigned short) strtol(uptr->port_str, &endstr, 10);
1077
249
        if (*endstr == '\0') {
1078
12
            return APR_SUCCESS;
1079
12
        }
1080
        /* Invalid characters after ':' found */
1081
249
    }
1082
249
    return APR_EGENERAL;
1083
261
}