Coverage Report

Created: 2025-10-10 06:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/httpd/srclib/apr/uri/apr_uri.c
Line
Count
Source
1
/* Licensed to the Apache Software Foundation (ASF) under one or more
2
 * contributor license agreements.  See the NOTICE file distributed with
3
 * this work for additional information regarding copyright ownership.
4
 * The ASF licenses this file to You under the Apache License, Version 2.0
5
 * (the "License"); you may not use this file except in compliance with
6
 * the License.  You may obtain a copy of the License at
7
 *
8
 *     http://www.apache.org/licenses/LICENSE-2.0
9
 *
10
 * Unless required by applicable law or agreed to in writing, software
11
 * distributed under the License is distributed on an "AS IS" BASIS,
12
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
 * See the License for the specific language governing permissions and
14
 * limitations under the License.
15
 */
16
17
/*
18
 * apr_uri.c: URI related utility things
19
 *
20
 */
21
22
#include <stdlib.h>
23
24
#include "apu.h"
25
#include "apr.h"
26
#include "apr_general.h"
27
#include "apr_strings.h"
28
29
#define APR_WANT_STRFUNC
30
#include "apr_want.h"
31
32
#include "apr_uri.h"
33
34
typedef struct schemes_t schemes_t;
35
36
/** Structure to store various schemes and their default ports */
37
struct schemes_t {
38
    /** The name of the scheme */
39
    const char *name;
40
    /** The default port for the scheme */
41
    apr_port_t default_port;
42
};
43
44
/* Some WWW schemes and their default ports; this is basically /etc/services */
45
/* This will become global when the protocol abstraction comes */
46
/* As the schemes are searched by a linear search, */
47
/* they are sorted by their expected frequency */
48
static schemes_t schemes[] =
49
{
50
    {"http",     APR_URI_HTTP_DEFAULT_PORT},
51
    {"ftp",      APR_URI_FTP_DEFAULT_PORT},
52
    {"https",    APR_URI_HTTPS_DEFAULT_PORT},
53
    {"gopher",   APR_URI_GOPHER_DEFAULT_PORT},
54
    {"ldap",     APR_URI_LDAP_DEFAULT_PORT},
55
    {"nntp",     APR_URI_NNTP_DEFAULT_PORT},
56
    {"snews",    APR_URI_SNEWS_DEFAULT_PORT},
57
    {"imap",     APR_URI_IMAP_DEFAULT_PORT},
58
    {"pop",      APR_URI_POP_DEFAULT_PORT},
59
    {"sip",      APR_URI_SIP_DEFAULT_PORT},
60
    {"rtsp",     APR_URI_RTSP_DEFAULT_PORT},
61
    {"wais",     APR_URI_WAIS_DEFAULT_PORT},
62
    {"z39.50r",  APR_URI_WAIS_DEFAULT_PORT},
63
    {"z39.50s",  APR_URI_WAIS_DEFAULT_PORT},
64
    {"prospero", APR_URI_PROSPERO_DEFAULT_PORT},
65
    {"nfs",      APR_URI_NFS_DEFAULT_PORT},
66
    {"tip",      APR_URI_TIP_DEFAULT_PORT},
67
    {"acap",     APR_URI_ACAP_DEFAULT_PORT},
68
    {"telnet",   APR_URI_TELNET_DEFAULT_PORT},
69
    {"ssh",      APR_URI_SSH_DEFAULT_PORT},
70
    { NULL, 0xFFFF }     /* unknown port */
71
};
72
73
345
#define LINK_LOCAL(ipv6addr) ((strlen(ipv6addr) >= 5) && \
74
345
                             ((ipv6addr)[4] == ':') && \
75
345
                             !strncasecmp(ipv6addr, "fe", 2) && \
76
345
                             strchr("89aAbB", (ipv6addr)[2]) && \
77
345
                             strchr("0123456789aAbBcCdDeEfF", (ipv6addr)[3]))
78
79
/*
80
 * *only* for IPv6 addresses with a zone identifier according to RFC6874
81
 */
82
static apr_status_t detect_scope_zone_id(int *have_zone_id, char const *ipv6addr,
83
                                         size_t len)
84
418
{
85
418
    char *s;
86
87
418
    *have_zone_id = 0;
88
89
418
    if (len < 3 + 5) {
90
        /*
91
         * We neeed *at least* the three characters for a percent-encoded
92
         * percent sign. Furthermore scope id's are only allowed for link-local
93
         * addresses under prefix fe80::/10.
94
         */
95
130
        return APR_SUCCESS;
96
130
    }
97
98
288
    if (!LINK_LOCAL(ipv6addr)) {
99
        /*
100
         * Scope id's are only allowed for link-local addresses under prefix
101
         * fe80::/10.
102
         */
103
228
        return APR_SUCCESS;
104
228
    }
105
106
60
    s = memchr(ipv6addr, '%', len);
107
60
    if (s != NULL && s < ipv6addr + len - 2) {
108
        /* RFC3986 is pretty specific about how to percent encode, but
109
         * decoding is to be performed per component, which is what we
110
         * already have here. On the other hand, RFC6874 is clear that
111
         * the delimiter for a zone identifier must be a percent encoded
112
         * percent, i.e. "%25". Any other percent-encoded character is
113
         * invalid here.
114
         */
115
53
        if (s[1] != '2' || s[2] != '5') {
116
44
            return APR_EINVAL;
117
44
        }
118
9
        *have_zone_id = 1;
119
9
    }
120
16
    return APR_SUCCESS;
121
60
}
122
123
static void percent_decode_scope_zone_id(char *hostname)
124
9
{
125
    /* RFC6874 is a little hand-wavy in terms of what to decode. Technically,
126
     * all percent-encoded characters should be decoded, but also, the RFC states
127
     * that they SHOULD not occur, basically.
128
     *
129
     * So let's assume they don't, to keep things simple. Because otherwise we'd
130
     * have to deal with full RFC3986 rules and perform UTF-8 decoding as well
131
     * and all that.
132
     */
133
9
    size_t len = strlen(hostname);
134
135
    /* We know from the caller already that this *is* a percent encoded
136
     * percent sign, so we just want to skip it. Trust the caller here.
137
     */
138
9
    char *s = memchr(hostname, '%', len);
139
9
    size_t offset = s - hostname;
140
9
    memmove(hostname + offset + 1, hostname + offset + 3, len - offset - 2);
141
9
}
142
143
static char *percent_encode_scope_zone_id(apr_pool_t *p, apr_uri_t const *uptr)
144
61
{
145
    /* Inverse to the logic in the decode function, we need to encode the first
146
     * percent sign we encounter (if any).
147
     */
148
61
    size_t len = strlen(uptr->hostname);
149
61
    char * s = memchr(uptr->hostname, '%', len);
150
61
    size_t offset;
151
61
    char *hostcopy;
152
153
61
    if ((s == NULL) || !LINK_LOCAL(uptr->hostname)) {
154
        /*
155
         * Scope id's are only allowed for link-local addresses under prefix
156
         * fe80::/10.
157
         */
158
43
        return uptr->hostname;
159
43
    }
160
161
18
    offset = s - uptr->hostname;
162
163
18
    hostcopy = apr_palloc(p, len + 3);
164
18
    memcpy(hostcopy, uptr->hostname, offset + 1);
165
18
    hostcopy[offset + 1] = '2';
166
18
    hostcopy[offset + 2] = '5';
167
18
    memcpy(hostcopy + offset + 3, uptr->hostname + offset + 1,
168
18
           len - offset - 1);
169
18
    hostcopy[len + 2] = '\0';
170
171
18
    return hostcopy;
172
61
}
173
174
APR_DECLARE(apr_port_t) apr_uri_port_of_scheme(const char *scheme_str)
175
263
{
176
263
    schemes_t *scheme;
177
178
263
    if (scheme_str) {
179
4.51k
        for (scheme = schemes; scheme->name != NULL; ++scheme) {
180
4.32k
            if (strcasecmp(scheme_str, scheme->name) == 0) {
181
65
                return scheme->default_port;
182
65
            }
183
4.32k
        }
184
256
    }
185
198
    return 0;
186
263
}
187
188
/* Unparse a apr_uri_t structure to an URI string.
189
 * Optionally suppress the password for security reasons.
190
 */
191
APR_DECLARE(char *) apr_uri_unparse(apr_pool_t *p,
192
                                    const apr_uri_t *uptr,
193
                                    unsigned flags)
194
240
{
195
240
    char *ret = "";
196
197
    /* If suppressing the site part, omit both user name & scheme://hostname */
198
240
    if (!(flags & APR_URI_UNP_OMITSITEPART)) {
199
200
        /* Construct a "user:password@" string, honoring the passed
201
         * APR_URI_UNP_ flags: */
202
240
        if (uptr->user || uptr->password) {
203
33
            ret = apr_pstrcat(p,
204
33
                      (uptr->user     && !(flags & APR_URI_UNP_OMITUSER))
205
33
                          ? uptr->user : "",
206
33
                      (uptr->password && !(flags & APR_URI_UNP_OMITPASSWORD))
207
33
                          ? ":" : "",
208
33
                      (uptr->password && !(flags & APR_URI_UNP_OMITPASSWORD))
209
33
                          ? ((flags & APR_URI_UNP_REVEALPASSWORD)
210
8
                              ? uptr->password : "XXXXXXXX")
211
33
                          : "",
212
33
                      ((uptr->user     && !(flags & APR_URI_UNP_OMITUSER)) ||
213
0
                       (uptr->password && !(flags & APR_URI_UNP_OMITPASSWORD)))
214
33
                          ? "@" : "",
215
33
                      NULL);
216
33
        }
217
218
        /* Construct scheme://site string */
219
240
        if (uptr->hostname) {
220
196
            int is_default_port;
221
196
            const char *lbrk = "", *rbrk = "";
222
196
            char *host = uptr->hostname;
223
224
196
            if (strchr(host, ':')) { /* v6 literal */
225
61
                lbrk = "[";
226
61
                rbrk = "]";
227
228
61
                host = percent_encode_scope_zone_id(p, uptr);
229
61
            }
230
231
196
            is_default_port =
232
196
                (uptr->port_str == NULL ||
233
145
                 uptr->port == 0 ||
234
94
                 uptr->port == apr_uri_port_of_scheme(uptr->scheme));
235
236
196
            ret = apr_pstrcat(p, "//", ret, lbrk, host, rbrk,
237
196
                        is_default_port ? "" : ":",
238
196
                        is_default_port ? "" : uptr->port_str,
239
196
                        NULL);
240
196
        }
241
240
  if (uptr->scheme) {
242
173
      ret = apr_pstrcat(p, uptr->scheme, ":", ret, NULL);
243
173
  }
244
240
    }
245
246
    /* Should we suppress all path info? */
247
240
    if (!(flags & APR_URI_UNP_OMITPATHINFO)) {
248
        /* Append path, query and fragment strings: */
249
240
        ret = apr_pstrcat(p,
250
240
                          ret,
251
240
                          (uptr->path)
252
240
                              ? uptr->path : "",
253
240
                          (uptr->query    && !(flags & APR_URI_UNP_OMITQUERY))
254
240
                              ? "?" : "",
255
240
                          (uptr->query    && !(flags & APR_URI_UNP_OMITQUERY))
256
240
                              ? uptr->query : "",
257
240
                          (uptr->fragment && !(flags & APR_URI_UNP_OMITQUERY))
258
240
                              ? "#" : NULL,
259
240
                          (uptr->fragment && !(flags & APR_URI_UNP_OMITQUERY))
260
240
                              ? uptr->fragment : NULL,
261
240
                          NULL);
262
240
    }
263
240
    return ret;
264
240
}
265
266
/* Here is the hand-optimized parse_uri_components().  There are some wild
267
 * tricks we could pull in assembly language that we don't pull here... like we
268
 * can do word-at-time scans for delimiter characters using the same technique
269
 * that fast memchr()s use.  But that would be way non-portable. -djg
270
 */
271
272
/* We have a apr_table_t that we can index by character and it tells us if the
273
 * character is one of the interesting delimiters.  Note that we even get
274
 * compares for NUL for free -- it's just another delimiter.
275
 */
276
277
9.97k
#define T_SLASH           0x01        /* '/' */
278
16.0k
#define T_QUESTION        0x02        /* '?' */
279
16.0k
#define T_HASH            0x04        /* '#' */
280
5.65k
#define T_ALPHA           0x08        /* 'A' ... 'Z', 'a' ... 'z' */
281
5.12k
#define T_SCHEME          0x10        /* '0' ... '9', '-', '+', '.'
282
                                       * (allowed in scheme except first char)
283
                                       */
284
16.0k
#define T_NUL             0x80        /* '\0' */
285
286
#if APR_CHARSET_EBCDIC
287
/* Delimiter table for the EBCDIC character set */
288
static const unsigned char uri_delims[256] = {
289
    T_NUL,                      /* 0x00     */
290
    0,                          /* 0x01     */
291
    0,                          /* 0x02     */
292
    0,                          /* 0x03     */
293
    0,                          /* 0x04     */
294
    0,                          /* 0x05     */
295
    0,                          /* 0x06     */
296
    0,                          /* 0x07     */
297
    0,                          /* 0x08     */
298
    0,                          /* 0x09     */
299
    0,                          /* 0x0a     */
300
    0,                          /* 0x0b     */
301
    0,                          /* 0x0c     */
302
    0,                          /* 0x0d     */
303
    0,                          /* 0x0e     */
304
    0,                          /* 0x0f     */
305
    0,                          /* 0x10     */
306
    0,                          /* 0x11     */
307
    0,                          /* 0x12     */
308
    0,                          /* 0x13     */
309
    0,                          /* 0x14     */
310
    0,                          /* 0x15     */
311
    0,                          /* 0x16     */
312
    0,                          /* 0x17     */
313
    0,                          /* 0x18     */
314
    0,                          /* 0x19     */
315
    0,                          /* 0x1a     */
316
    0,                          /* 0x1b     */
317
    0,                          /* 0x1c     */
318
    0,                          /* 0x1d     */
319
    0,                          /* 0x1e     */
320
    0,                          /* 0x1f     */
321
    0,                          /* 0x20     */
322
    0,                          /* 0x21     */
323
    0,                          /* 0x22     */
324
    0,                          /* 0x23     */
325
    0,                          /* 0x24     */
326
    0,                          /* 0x25     */
327
    0,                          /* 0x26     */
328
    0,                          /* 0x27     */
329
    0,                          /* 0x28     */
330
    0,                          /* 0x29     */
331
    0,                          /* 0x2a     */
332
    0,                          /* 0x2b     */
333
    0,                          /* 0x2c     */
334
    0,                          /* 0x2d     */
335
    0,                          /* 0x2e     */
336
    0,                          /* 0x2f     */
337
    0,                          /* 0x30     */
338
    0,                          /* 0x31     */
339
    0,                          /* 0x32     */
340
    0,                          /* 0x33     */
341
    0,                          /* 0x34     */
342
    0,                          /* 0x35     */
343
    0,                          /* 0x36     */
344
    0,                          /* 0x37     */
345
    0,                          /* 0x38     */
346
    0,                          /* 0x39     */
347
    0,                          /* 0x3a     */
348
    0,                          /* 0x3b     */
349
    0,                          /* 0x3c     */
350
    0,                          /* 0x3d     */
351
    0,                          /* 0x3e     */
352
    0,                          /* 0x3f     */
353
    0,                          /* 0x40 ' ' */
354
    0,                          /* 0x41     */
355
    0,                          /* 0x42     */
356
    0,                          /* 0x43     */
357
    0,                          /* 0x44     */
358
    0,                          /* 0x45     */
359
    0,                          /* 0x46     */
360
    0,                          /* 0x47     */
361
    0,                          /* 0x48     */
362
    0,                          /* 0x49     */
363
    0,                          /* 0x4a '[' */
364
    T_SCHEME,                   /* 0x4b '.' */
365
    0,                          /* 0x4c '<' */
366
    0,                          /* 0x4d '(' */
367
    T_SCHEME,                   /* 0x4e '+' */
368
    0,                          /* 0x4f '!' */
369
    0,                          /* 0x50 '&' */
370
    0,                          /* 0x51     */
371
    0,                          /* 0x52     */
372
    0,                          /* 0x53     */
373
    0,                          /* 0x54     */
374
    0,                          /* 0x55     */
375
    0,                          /* 0x56     */
376
    0,                          /* 0x57     */
377
    0,                          /* 0x58     */
378
    0,                          /* 0x59     */
379
    0,                          /* 0x5a ']' */
380
    0,                          /* 0x5b '$' */
381
    0,                          /* 0x5c '*' */
382
    0,                          /* 0x5d ')' */
383
    0,                          /* 0x5e ';' */
384
    0,                          /* 0x5f '^' */
385
    T_SCHEME,                   /* 0x60 '-' */
386
    T_SLASH,                    /* 0x61 '/' */
387
    0,                          /* 0x62     */
388
    0,                          /* 0x63     */
389
    0,                          /* 0x64     */
390
    0,                          /* 0x65     */
391
    0,                          /* 0x66     */
392
    0,                          /* 0x67     */
393
    0,                          /* 0x68     */
394
    0,                          /* 0x69     */
395
    0,                          /* 0x6a '|' */
396
    0,                          /* 0x6b ',' */
397
    0,                          /* 0x6c '%' */
398
    0,                          /* 0x6d '_' */
399
    0,                          /* 0x6e '>' */
400
    T_QUESTION,                 /* 0x6f '?' */
401
    0,                          /* 0x70     */
402
    0,                          /* 0x71     */
403
    0,                          /* 0x72     */
404
    0,                          /* 0x73     */
405
    0,                          /* 0x74     */
406
    0,                          /* 0x75     */
407
    0,                          /* 0x76     */
408
    0,                          /* 0x77     */
409
    0,                          /* 0x78     */
410
    0,                          /* 0x79 '`' */
411
    0,                          /* 0x7a ':' */
412
    T_HASH,                     /* 0x7b '#' */
413
    0,                          /* 0x7c '@' */
414
    0,                          /* 0x7d ''' */
415
    0,                          /* 0x7e '=' */
416
    0,                          /* 0x7f '"' */
417
    0,                          /* 0x80     */
418
    T_ALPHA,                    /* 0x81 'a' */
419
    T_ALPHA,                    /* 0x82 'b' */
420
    T_ALPHA,                    /* 0x83 'c' */
421
    T_ALPHA,                    /* 0x84 'd' */
422
    T_ALPHA,                    /* 0x85 'e' */
423
    T_ALPHA,                    /* 0x86 'f' */
424
    T_ALPHA,                    /* 0x87 'g' */
425
    T_ALPHA,                    /* 0x88 'h' */
426
    T_ALPHA,                    /* 0x89 'i' */
427
    0,                          /* 0x8a     */
428
    0,                          /* 0x8b     */
429
    0,                          /* 0x8c     */
430
    0,                          /* 0x8d     */
431
    0,                          /* 0x8e     */
432
    0,                          /* 0x8f     */
433
    0,                          /* 0x90     */
434
    T_ALPHA,                    /* 0x91 'j' */
435
    T_ALPHA,                    /* 0x92 'k' */
436
    T_ALPHA,                    /* 0x93 'l' */
437
    T_ALPHA,                    /* 0x94 'm' */
438
    T_ALPHA,                    /* 0x95 'n' */
439
    T_ALPHA,                    /* 0x96 'o' */
440
    T_ALPHA,                    /* 0x97 'p' */
441
    T_ALPHA,                    /* 0x98 'q' */
442
    T_ALPHA,                    /* 0x99 'r' */
443
    0,                          /* 0x9a     */
444
    0,                          /* 0x9b     */
445
    0,                          /* 0x9c     */
446
    0,                          /* 0x9d     */
447
    0,                          /* 0x9e     */
448
    0,                          /* 0x9f     */
449
    0,                          /* 0xa0     */
450
    0,                          /* 0xa1 '~' */
451
    T_ALPHA,                    /* 0xa2 's' */
452
    T_ALPHA,                    /* 0xa3 't' */
453
    T_ALPHA,                    /* 0xa4 'u' */
454
    T_ALPHA,                    /* 0xa5 'v' */
455
    T_ALPHA,                    /* 0xa6 'w' */
456
    T_ALPHA,                    /* 0xa7 'x' */
457
    T_ALPHA,                    /* 0xa8 'y' */
458
    T_ALPHA,                    /* 0xa9 'z' */
459
    0,                          /* 0xaa     */
460
    0,                          /* 0xab     */
461
    0,                          /* 0xac     */
462
    0,                          /* 0xad     */
463
    0,                          /* 0xae     */
464
    0,                          /* 0xaf     */
465
    0,                          /* 0xb0     */
466
    0,                          /* 0xb1     */
467
    0,                          /* 0xb2     */
468
    0,                          /* 0xb3     */
469
    0,                          /* 0xb4     */
470
    0,                          /* 0xb5     */
471
    0,                          /* 0xb6     */
472
    0,                          /* 0xb7     */
473
    0,                          /* 0xb8     */
474
    0,                          /* 0xb9     */
475
    0,                          /* 0xba     */
476
    0,                          /* 0xbb     */
477
    0,                          /* 0xbc     */
478
    0,                          /* 0xbd     */
479
    0,                          /* 0xbe     */
480
    0,                          /* 0xbf     */
481
    0,                          /* 0xc0 '{' */
482
    T_ALPHA,                    /* 0xc1 'A' */
483
    T_ALPHA,                    /* 0xc2 'B' */
484
    T_ALPHA,                    /* 0xc3 'C' */
485
    T_ALPHA,                    /* 0xc4 'D' */
486
    T_ALPHA,                    /* 0xc5 'E' */
487
    T_ALPHA,                    /* 0xc6 'F' */
488
    T_ALPHA,                    /* 0xc7 'G' */
489
    T_ALPHA,                    /* 0xc8 'H' */
490
    T_ALPHA,                    /* 0xc9 'I' */
491
    0,                          /* 0xca     */
492
    0,                          /* 0xcb     */
493
    0,                          /* 0xcc     */
494
    0,                          /* 0xcd     */
495
    0,                          /* 0xce     */
496
    0,                          /* 0xcf     */
497
    0,                          /* 0xd0 '}' */
498
    T_ALPHA,                    /* 0xd1 'J' */
499
    T_ALPHA,                    /* 0xd2 'K' */
500
    T_ALPHA,                    /* 0xd3 'L' */
501
    T_ALPHA,                    /* 0xd4 'M' */
502
    T_ALPHA,                    /* 0xd5 'N' */
503
    T_ALPHA,                    /* 0xd6 'O' */
504
    T_ALPHA,                    /* 0xd7 'P' */
505
    T_ALPHA,                    /* 0xd8 'Q' */
506
    T_ALPHA,                    /* 0xd9 'R' */
507
    0,                          /* 0xda     */
508
    0,                          /* 0xdb     */
509
    0,                          /* 0xdc     */
510
    0,                          /* 0xdd     */
511
    0,                          /* 0xde     */
512
    0,                          /* 0xdf     */
513
    0,                          /* 0xe0 '\' */
514
    0,                          /* 0xe1     */
515
    T_ALPHA,                    /* 0xe2 'S' */
516
    T_ALPHA,                    /* 0xe3 'T' */
517
    T_ALPHA,                    /* 0xe4 'U' */
518
    T_ALPHA,                    /* 0xe5 'V' */
519
    T_ALPHA,                    /* 0xe6 'W' */
520
    T_ALPHA,                    /* 0xe7 'X' */
521
    T_ALPHA,                    /* 0xe8 'Y' */
522
    T_ALPHA,                    /* 0xe9 'Z' */
523
    0,                          /* 0xea     */
524
    0,                          /* 0xeb     */
525
    0,                          /* 0xec     */
526
    0,                          /* 0xed     */
527
    0,                          /* 0xee     */
528
    0,                          /* 0xef     */
529
    T_SCHEME,                   /* 0xf0 '0' */
530
    T_SCHEME,                   /* 0xf1 '1' */
531
    T_SCHEME,                   /* 0xf2 '2' */
532
    T_SCHEME,                   /* 0xf3 '3' */
533
    T_SCHEME,                   /* 0xf4 '4' */
534
    T_SCHEME,                   /* 0xf5 '5' */
535
    T_SCHEME,                   /* 0xf6 '6' */
536
    T_SCHEME,                   /* 0xf7 '7' */
537
    T_SCHEME,                   /* 0xf8 '8' */
538
    T_SCHEME,                   /* 0xf9 '9' */
539
    0,                          /* 0xfa     */
540
    0,                          /* 0xfb     */
541
    0,                          /* 0xfc     */
542
    0,                          /* 0xfd     */
543
    0,                          /* 0xfe     */
544
    0                           /* 0xff     */
545
};
546
#else
547
/* Delimiter table for the ASCII character set */
548
static const unsigned char uri_delims[256] = {
549
    T_NUL,                      /* 0x00     */
550
    0,                          /* 0x01     */
551
    0,                          /* 0x02     */
552
    0,                          /* 0x03     */
553
    0,                          /* 0x04     */
554
    0,                          /* 0x05     */
555
    0,                          /* 0x06     */
556
    0,                          /* 0x07     */
557
    0,                          /* 0x08     */
558
    0,                          /* 0x09     */
559
    0,                          /* 0x0a     */
560
    0,                          /* 0x0b     */
561
    0,                          /* 0x0c     */
562
    0,                          /* 0x0d     */
563
    0,                          /* 0x0e     */
564
    0,                          /* 0x0f     */
565
    0,                          /* 0x10     */
566
    0,                          /* 0x11     */
567
    0,                          /* 0x12     */
568
    0,                          /* 0x13     */
569
    0,                          /* 0x14     */
570
    0,                          /* 0x15     */
571
    0,                          /* 0x16     */
572
    0,                          /* 0x17     */
573
    0,                          /* 0x18     */
574
    0,                          /* 0x19     */
575
    0,                          /* 0x1a     */
576
    0,                          /* 0x1b     */
577
    0,                          /* 0x1c     */
578
    0,                          /* 0x1d     */
579
    0,                          /* 0x1e     */
580
    0,                          /* 0x1f     */
581
    0,                          /* 0x20 ' ' */
582
    0,                          /* 0x21 '!' */
583
    0,                          /* 0x22 '"' */
584
    T_HASH,                     /* 0x23 '#' */
585
    0,                          /* 0x24 '$' */
586
    0,                          /* 0x25 '%' */
587
    0,                          /* 0x26 '&' */
588
    0,                          /* 0x27 ''' */
589
    0,                          /* 0x28 '(' */
590
    0,                          /* 0x29 ')' */
591
    0,                          /* 0x2a '*' */
592
    T_SCHEME,                   /* 0x2b '+' */
593
    0,                          /* 0x2c ',' */
594
    T_SCHEME,                   /* 0x2d '-' */
595
    T_SCHEME,                   /* 0x2e '.' */
596
    T_SLASH,                    /* 0x2f '/' */
597
    T_SCHEME,                   /* 0x30 '0' */
598
    T_SCHEME,                   /* 0x31 '1' */
599
    T_SCHEME,                   /* 0x32 '2' */
600
    T_SCHEME,                   /* 0x33 '3' */
601
    T_SCHEME,                   /* 0x34 '4' */
602
    T_SCHEME,                   /* 0x35 '5' */
603
    T_SCHEME,                   /* 0x36 '6' */
604
    T_SCHEME,                   /* 0x37 '7' */
605
    T_SCHEME,                   /* 0x38 '8' */
606
    T_SCHEME,                   /* 0x39 '9' */
607
    0,                          /* 0x3a ':' */
608
    0,                          /* 0x3b ';' */
609
    0,                          /* 0x3c '<' */
610
    0,                          /* 0x3d '=' */
611
    0,                          /* 0x3e '>' */
612
    T_QUESTION,                 /* 0x3f '?' */
613
    0,                          /* 0x40 '@' */
614
    T_ALPHA,                    /* 0x41 'A' */
615
    T_ALPHA,                    /* 0x42 'B' */
616
    T_ALPHA,                    /* 0x43 'C' */
617
    T_ALPHA,                    /* 0x44 'D' */
618
    T_ALPHA,                    /* 0x45 'E' */
619
    T_ALPHA,                    /* 0x46 'F' */
620
    T_ALPHA,                    /* 0x47 'G' */
621
    T_ALPHA,                    /* 0x48 'H' */
622
    T_ALPHA,                    /* 0x49 'I' */
623
    T_ALPHA,                    /* 0x4a 'J' */
624
    T_ALPHA,                    /* 0x4b 'K' */
625
    T_ALPHA,                    /* 0x4c 'L' */
626
    T_ALPHA,                    /* 0x4d 'M' */
627
    T_ALPHA,                    /* 0x4e 'N' */
628
    T_ALPHA,                    /* 0x4f 'O' */
629
    T_ALPHA,                    /* 0x50 'P' */
630
    T_ALPHA,                    /* 0x51 'Q' */
631
    T_ALPHA,                    /* 0x52 'R' */
632
    T_ALPHA,                    /* 0x53 'S' */
633
    T_ALPHA,                    /* 0x54 'T' */
634
    T_ALPHA,                    /* 0x55 'U' */
635
    T_ALPHA,                    /* 0x56 'V' */
636
    T_ALPHA,                    /* 0x57 'W' */
637
    T_ALPHA,                    /* 0x58 'X' */
638
    T_ALPHA,                    /* 0x59 'Y' */
639
    T_ALPHA,                    /* 0x5a 'Z' */
640
    0,                          /* 0x5b '[' */
641
    0,                          /* 0x5c '\' */
642
    0,                          /* 0x5d ']' */
643
    0,                          /* 0x5e '^' */
644
    0,                          /* 0x5f '_' */
645
    0,                          /* 0x60 '`' */
646
    T_ALPHA,                    /* 0x61 'a' */
647
    T_ALPHA,                    /* 0x62 'b' */
648
    T_ALPHA,                    /* 0x63 'c' */
649
    T_ALPHA,                    /* 0x64 'd' */
650
    T_ALPHA,                    /* 0x65 'e' */
651
    T_ALPHA,                    /* 0x66 'f' */
652
    T_ALPHA,                    /* 0x67 'g' */
653
    T_ALPHA,                    /* 0x68 'h' */
654
    T_ALPHA,                    /* 0x69 'i' */
655
    T_ALPHA,                    /* 0x6a 'j' */
656
    T_ALPHA,                    /* 0x6b 'k' */
657
    T_ALPHA,                    /* 0x6c 'l' */
658
    T_ALPHA,                    /* 0x6d 'm' */
659
    T_ALPHA,                    /* 0x6e 'n' */
660
    T_ALPHA,                    /* 0x6f 'o' */
661
    T_ALPHA,                    /* 0x70 'p' */
662
    T_ALPHA,                    /* 0x71 'q' */
663
    T_ALPHA,                    /* 0x72 'r' */
664
    T_ALPHA,                    /* 0x73 's' */
665
    T_ALPHA,                    /* 0x74 't' */
666
    T_ALPHA,                    /* 0x75 'u' */
667
    T_ALPHA,                    /* 0x76 'v' */
668
    T_ALPHA,                    /* 0x77 'w' */
669
    T_ALPHA,                    /* 0x78 'x' */
670
    T_ALPHA,                    /* 0x79 'y' */
671
    T_ALPHA,                    /* 0x7a 'z' */
672
    0,                          /* 0x7b '{' */
673
    0,                          /* 0x7c '|' */
674
    0,                          /* 0x7d '}' */
675
    0,                          /* 0x7e '~' */
676
    0,                          /* 0x7f     */
677
    0,                          /* 0x80     */
678
    0,                          /* 0x81     */
679
    0,                          /* 0x82     */
680
    0,                          /* 0x83     */
681
    0,                          /* 0x84     */
682
    0,                          /* 0x85     */
683
    0,                          /* 0x86     */
684
    0,                          /* 0x87     */
685
    0,                          /* 0x88     */
686
    0,                          /* 0x89     */
687
    0,                          /* 0x8a     */
688
    0,                          /* 0x8b     */
689
    0,                          /* 0x8c     */
690
    0,                          /* 0x8d     */
691
    0,                          /* 0x8e     */
692
    0,                          /* 0x8f     */
693
    0,                          /* 0x90     */
694
    0,                          /* 0x91     */
695
    0,                          /* 0x92     */
696
    0,                          /* 0x93     */
697
    0,                          /* 0x94     */
698
    0,                          /* 0x95     */
699
    0,                          /* 0x96     */
700
    0,                          /* 0x97     */
701
    0,                          /* 0x98     */
702
    0,                          /* 0x99     */
703
    0,                          /* 0x9a     */
704
    0,                          /* 0x9b     */
705
    0,                          /* 0x9c     */
706
    0,                          /* 0x9d     */
707
    0,                          /* 0x9e     */
708
    0,                          /* 0x9f     */
709
    0,                          /* 0xa0     */
710
    0,                          /* 0xa1     */
711
    0,                          /* 0xa2     */
712
    0,                          /* 0xa3     */
713
    0,                          /* 0xa4     */
714
    0,                          /* 0xa5     */
715
    0,                          /* 0xa6     */
716
    0,                          /* 0xa7     */
717
    0,                          /* 0xa8     */
718
    0,                          /* 0xa9     */
719
    0,                          /* 0xaa     */
720
    0,                          /* 0xab     */
721
    0,                          /* 0xac     */
722
    0,                          /* 0xad     */
723
    0,                          /* 0xae     */
724
    0,                          /* 0xaf     */
725
    0,                          /* 0xb0     */
726
    0,                          /* 0xb1     */
727
    0,                          /* 0xb2     */
728
    0,                          /* 0xb3     */
729
    0,                          /* 0xb4     */
730
    0,                          /* 0xb5     */
731
    0,                          /* 0xb6     */
732
    0,                          /* 0xb7     */
733
    0,                          /* 0xb8     */
734
    0,                          /* 0xb9     */
735
    0,                          /* 0xba     */
736
    0,                          /* 0xbb     */
737
    0,                          /* 0xbc     */
738
    0,                          /* 0xbd     */
739
    0,                          /* 0xbe     */
740
    0,                          /* 0xbf     */
741
    0,                          /* 0xc0     */
742
    0,                          /* 0xc1     */
743
    0,                          /* 0xc2     */
744
    0,                          /* 0xc3     */
745
    0,                          /* 0xc4     */
746
    0,                          /* 0xc5     */
747
    0,                          /* 0xc6     */
748
    0,                          /* 0xc7     */
749
    0,                          /* 0xc8     */
750
    0,                          /* 0xc9     */
751
    0,                          /* 0xca     */
752
    0,                          /* 0xcb     */
753
    0,                          /* 0xcc     */
754
    0,                          /* 0xcd     */
755
    0,                          /* 0xce     */
756
    0,                          /* 0xcf     */
757
    0,                          /* 0xd0     */
758
    0,                          /* 0xd1     */
759
    0,                          /* 0xd2     */
760
    0,                          /* 0xd3     */
761
    0,                          /* 0xd4     */
762
    0,                          /* 0xd5     */
763
    0,                          /* 0xd6     */
764
    0,                          /* 0xd7     */
765
    0,                          /* 0xd8     */
766
    0,                          /* 0xd9     */
767
    0,                          /* 0xda     */
768
    0,                          /* 0xdb     */
769
    0,                          /* 0xdc     */
770
    0,                          /* 0xdd     */
771
    0,                          /* 0xde     */
772
    0,                          /* 0xdf     */
773
    0,                          /* 0xe0     */
774
    0,                          /* 0xe1     */
775
    0,                          /* 0xe2     */
776
    0,                          /* 0xe3     */
777
    0,                          /* 0xe4     */
778
    0,                          /* 0xe5     */
779
    0,                          /* 0xe6     */
780
    0,                          /* 0xe7     */
781
    0,                          /* 0xe8     */
782
    0,                          /* 0xe9     */
783
    0,                          /* 0xea     */
784
    0,                          /* 0xeb     */
785
    0,                          /* 0xec     */
786
    0,                          /* 0xed     */
787
    0,                          /* 0xee     */
788
    0,                          /* 0xef     */
789
    0,                          /* 0xf0     */
790
    0,                          /* 0xf1     */
791
    0,                          /* 0xf2     */
792
    0,                          /* 0xf3     */
793
    0,                          /* 0xf4     */
794
    0,                          /* 0xf5     */
795
    0,                          /* 0xf6     */
796
    0,                          /* 0xf7     */
797
    0,                          /* 0xf8     */
798
    0,                          /* 0xf9     */
799
    0,                          /* 0xfa     */
800
    0,                          /* 0xfb     */
801
    0,                          /* 0xfc     */
802
    0,                          /* 0xfd     */
803
    0,                          /* 0xfe     */
804
    0                           /* 0xff     */
805
};
806
#endif
807
808
809
/* it works like this:
810
    if (uri_delims[ch] & NOTEND_foobar) {
811
        then we're not at a delimiter for foobar
812
    }
813
*/
814
815
9.97k
#define NOTEND_HOSTINFO   (T_SLASH | T_QUESTION | T_HASH | T_NUL)
816
6.10k
#define NOTEND_PATH       (T_QUESTION | T_HASH | T_NUL)
817
818
/* parse_uri_components():
819
 * Parse a given URI, fill in all supplied fields of a uri_components
820
 * structure. This eliminates the necessity of extracting host, port,
821
 * path, query info repeatedly in the modules.
822
 * Side effects:
823
 *  - fills in fields of uri_components *uptr
824
 *  - none on any of the r->* fields
825
 */
826
APR_DECLARE(apr_status_t) apr_uri_parse(apr_pool_t *p, const char *uri,
827
                                        apr_uri_t *uptr)
828
611
{
829
611
    const char *s;
830
611
    const char *s1;
831
611
    const char *hostinfo;
832
611
    char *endstr;
833
611
    int port;
834
611
    int v6_offset1 = 0, v6_offset2 = 0;
835
611
    int have_zone_id = 0;
836
837
    /* Initialize the structure. parse_uri() and parse_uri_components()
838
     * can be called more than once per request.
839
     */
840
611
    memset (uptr, '\0', sizeof(*uptr));
841
611
    uptr->is_initialized = 1;
842
843
    /* We assume the processor has a branch predictor like most --
844
     * it assumes forward branches are untaken and backwards are taken.  That's
845
     * the reason for the gotos.  -djg
846
     */
847
611
    if (uri[0] == '/') {
848
        /* RFC2396 #4.3 says that two leading slashes mean we have an
849
         * authority component, not a path!  Fixing this looks scary
850
         * with the gotos here.  But if the existing logic is valid,
851
         * then presumably a goto pointing to deal_with_authority works.
852
         *
853
         * RFC2396 describes this as resolving an ambiguity.  In the
854
         * case of three or more slashes there would seem to be no
855
         * ambiguity, so it is a path after all.
856
         */
857
83
        if (uri[1] == '/' && uri[2] != '/') {
858
44
            s = uri + 2 ;
859
44
            goto deal_with_authority ;
860
44
        }
861
862
514
deal_with_path:
863
        /* we expect uri to point to first character of path ... remember
864
         * that the path could be empty -- http://foobar?query for example
865
         */
866
514
        s = uri;
867
6.10k
        while ((uri_delims[*(unsigned char *)s] & NOTEND_PATH) == 0) {
868
5.59k
            ++s;
869
5.59k
        }
870
514
        if (s != uri) {
871
329
            uptr->path = apr_pstrmemdup(p, uri, s - uri);
872
329
        }
873
514
        if (*s == 0) {
874
440
            return APR_SUCCESS;
875
440
        }
876
74
        if (*s == '?') {
877
48
            ++s;
878
48
            s1 = strchr(s, '#');
879
48
            if (s1) {
880
12
                uptr->fragment = apr_pstrdup(p, s1 + 1);
881
12
                uptr->query = apr_pstrmemdup(p, s, s1 - s);
882
12
            }
883
36
            else {
884
36
                uptr->query = apr_pstrdup(p, s);
885
36
            }
886
48
            return APR_SUCCESS;
887
48
        }
888
        /* otherwise it's a fragment */
889
26
        uptr->fragment = apr_pstrdup(p, s + 1);
890
26
        return APR_SUCCESS;
891
74
    }
892
893
    /* find the scheme: */
894
528
    s = uri;
895
    /* first char must be letter */
896
528
    if (uri_delims[*(unsigned char *)s] & T_ALPHA) {
897
455
        ++s;
898
5.12k
        while ((uri_delims[*(unsigned char *)s] & (T_ALPHA|T_SCHEME)))
899
4.67k
            ++s;
900
455
    }
901
    /* scheme must be non-empty and followed by : */
902
528
    if (s != uri && s[0] == ':') {
903
399
        uptr->scheme = apr_pstrmemdup(p, uri, s - uri);
904
399
        s++;
905
399
    }
906
129
    else {
907
        /* No valid scheme, restart from the beginning */
908
129
        s = uri;
909
129
    }
910
911
528
    if (s[0] != '/' || s[1] != '/') {
912
172
        if (uri == s) {
913
            /*
914
             * RFC 3986 3.3: If we have no scheme and no authority,
915
             * the leading segment of a relative path must not contain a ':'.
916
             */
917
129
            char *first_slash = strchr(uri, '/');
918
129
            if (first_slash) {
919
1.00k
                while (s < first_slash) {
920
976
                    if (s[0] == ':')
921
40
                        return APR_EGENERAL;
922
936
                    ++s;
923
936
                }
924
                /* no scheme but relative path, e.g. '../image.jpg' */
925
69
            }
926
60
            else {
927
60
                if (strchr(uri, ':') != NULL)
928
18
                    return APR_EGENERAL;
929
                /* no scheme, no slash, but relative path, e.g. 'image.jpg' */
930
60
            }
931
71
            goto deal_with_path;
932
129
        }
933
        /* scheme and relative path */
934
43
        uri = s;
935
43
        goto deal_with_path;
936
172
    }
937
938
356
    s += 2;
939
940
400
deal_with_authority:
941
400
    hostinfo = s;
942
9.97k
    while ((uri_delims[*(unsigned char *)s] & NOTEND_HOSTINFO) == 0) {
943
9.57k
        ++s;
944
9.57k
    }
945
400
    uri = s;        /* whatever follows hostinfo is start of uri */
946
400
    uptr->hostinfo = apr_pstrmemdup(p, hostinfo, uri - hostinfo);
947
948
    /* If there's a username:password@host:port, the @ we want is the last @...
949
     * too bad there's no memrchr()... For the C purists, note that hostinfo
950
     * is definitely not the first character of the original uri so therefore
951
     * &hostinfo[-1] < &hostinfo[0] ... and this loop is valid C.
952
     */
953
8.79k
    do {
954
8.79k
        --s;
955
8.79k
    } while (s >= hostinfo && *s != '@');
956
400
    if (s < hostinfo) {
957
        /* again we want the common case to be fall through */
958
400
deal_with_host:
959
        /* We expect hostinfo to point to the first character of
960
         * the hostname.  If there's a port it is the first colon,
961
         * except with IPv6.
962
         *
963
         * IPv6 also has the interesting property (RFC6874) that it may contain
964
         * a percent-encoded percent delimiting the zone identifier. We need to
965
         * unescape that.
966
         */
967
400
        if (*hostinfo == '[') {
968
145
            apr_status_t err;
969
970
145
            v6_offset1 = 1;
971
145
            v6_offset2 = 2;
972
145
            s = memchr(hostinfo, ']', uri - hostinfo);
973
145
            if (s == NULL) {
974
5
                return APR_EGENERAL;
975
5
            }
976
977
            /* zone identifier */
978
140
            err = detect_scope_zone_id(&have_zone_id, hostinfo + 1, s - hostinfo - 1);
979
            /* FIXME: Ignore APR_EINVAL (invalid escaped character) for now as
980
             * old code may rely on it silently getting ignored?
981
             */
982
140
            if ((err != APR_SUCCESS) && (err != APR_EINVAL)) {
983
0
                return err;
984
0
            }
985
986
            /* Port */
987
140
            if (*++s != ':') {
988
80
                s = NULL; /* no port */
989
80
            }
990
140
        }
991
255
        else {
992
255
            s = memchr(hostinfo, ':', uri - hostinfo);
993
255
        }
994
395
        if (s == NULL) {
995
            /* we expect the common case to have no port */
996
99
            uptr->hostname = apr_pstrmemdup(p,
997
99
                                            hostinfo + v6_offset1,
998
99
                                            uri - hostinfo - v6_offset2);
999
99
            if (have_zone_id) {
1000
2
                percent_decode_scope_zone_id(uptr->hostname);
1001
2
            }
1002
99
            goto deal_with_path;
1003
99
        }
1004
296
        uptr->hostname = apr_pstrmemdup(p,
1005
296
                                        hostinfo + v6_offset1,
1006
296
                                        s - hostinfo - v6_offset2);
1007
296
        if (have_zone_id) {
1008
5
            percent_decode_scope_zone_id(uptr->hostname);
1009
5
        }
1010
296
        ++s;
1011
296
        uptr->port_str = apr_pstrmemdup(p, s, uri - s);
1012
296
        if (uri != s) {
1013
127
            port = strtol(uptr->port_str, &endstr, 10);
1014
127
            uptr->port = port;
1015
127
            if (*endstr == '\0') {
1016
93
                goto deal_with_path;
1017
93
            }
1018
            /* Invalid characters after ':' found */
1019
34
            return APR_EGENERAL;
1020
127
        }
1021
169
        uptr->port = apr_uri_port_of_scheme(uptr->scheme);
1022
169
        goto deal_with_path;
1023
296
    }
1024
1025
    /* first colon delimits username:password */
1026
104
    s1 = memchr(hostinfo, ':', s - hostinfo);
1027
104
    if (s1) {
1028
46
        uptr->user = apr_pstrmemdup(p, hostinfo, s1 - hostinfo);
1029
46
        ++s1;
1030
46
        uptr->password = apr_pstrmemdup(p, s1, s - s1);
1031
46
    }
1032
58
    else {
1033
58
        uptr->user = apr_pstrmemdup(p, hostinfo, s - hostinfo);
1034
58
    }
1035
104
    hostinfo = s + 1;
1036
104
    goto deal_with_host;
1037
400
}
1038
1039
/* Special case for CONNECT parsing: it comes with the hostinfo part only */
1040
/* See the INTERNET-DRAFT document "Tunneling SSL Through a WWW Proxy"
1041
 * currently at http://www.mcom.com/newsref/std/tunneling_ssl.html
1042
 * for the format of the "CONNECT host:port HTTP/1.0" request
1043
 */
1044
APR_DECLARE(apr_status_t) apr_uri_parse_hostinfo(apr_pool_t *p,
1045
                                                 const char *hostinfo,
1046
                                                 apr_uri_t *uptr)
1047
326
{
1048
326
    const char *s;
1049
326
    char *endstr;
1050
326
    const char *rsb;
1051
326
    int v6_offset1 = 0;
1052
1053
    /* Initialize the structure. parse_uri() and parse_uri_components()
1054
     * can be called more than once per request.
1055
     */
1056
326
    memset(uptr, '\0', sizeof(*uptr));
1057
326
    uptr->is_initialized = 1;
1058
326
    uptr->hostinfo = apr_pstrdup(p, hostinfo);
1059
1060
    /* We expect hostinfo to point to the first character of
1061
     * the hostname.  There must be a port, separated by a colon
1062
     */
1063
326
    if (*hostinfo == '[') {
1064
49
        if ((rsb = strchr(hostinfo, ']')) == NULL ||
1065
44
            *(rsb + 1) != ':') {
1066
30
            return APR_EGENERAL;
1067
30
        }
1068
        /* literal IPv6 address */
1069
19
        s = rsb + 1;
1070
19
        ++hostinfo;
1071
19
        v6_offset1 = 1;
1072
19
    }
1073
277
    else {
1074
277
        s = strchr(hostinfo, ':');
1075
277
    }
1076
296
    if (s == NULL) {
1077
18
        return APR_EGENERAL;
1078
18
    }
1079
278
    uptr->hostname = apr_pstrndup(p, hostinfo, s - hostinfo - v6_offset1);
1080
1081
    /* Again, ensure zone IDs are decoded. */
1082
278
    int have_zone_id = 0;
1083
278
    apr_status_t err = detect_scope_zone_id(&have_zone_id, uptr->hostname,
1084
278
                                            strlen(uptr->hostname));
1085
    /* FIXME: Ignore APR_EINVAL (invalid escaped character) for now as old code
1086
     * may rely on it silently getting ignored?
1087
     */
1088
278
    if ((err != APR_SUCCESS) && (err != APR_EINVAL)) {
1089
0
        return err;
1090
0
    }
1091
278
    if (have_zone_id) {
1092
2
        percent_decode_scope_zone_id(uptr->hostname);
1093
2
    }
1094
1095
278
    ++s;
1096
278
    uptr->port_str = apr_pstrdup(p, s);
1097
278
    if (*s != '\0') {
1098
270
        uptr->port = (unsigned short) strtol(uptr->port_str, &endstr, 10);
1099
270
        if (*endstr == '\0') {
1100
8
            return APR_SUCCESS;
1101
8
        }
1102
        /* Invalid characters after ':' found */
1103
270
    }
1104
270
    return APR_EGENERAL;
1105
278
}