Coverage Report

Created: 2024-05-18 02:13

/src/libxml2/uri.c
Line
Count
Source (jump to first uncovered line)
1
/**
2
 * uri.c: set of generic URI related routines
3
 *
4
 * Reference: RFCs 3986, 2732 and 2373
5
 *
6
 * See Copyright for the status of this software.
7
 *
8
 * daniel@veillard.com
9
 */
10
11
#define IN_LIBXML
12
#include "libxml.h"
13
14
#include <limits.h>
15
#include <string.h>
16
17
#include <libxml/xmlmemory.h>
18
#include <libxml/uri.h>
19
#include <libxml/globals.h>
20
#include <libxml/xmlerror.h>
21
22
#include "private/error.h"
23
24
/**
25
 * MAX_URI_LENGTH:
26
 *
27
 * The definition of the URI regexp in the above RFC has no size limit
28
 * In practice they are usually relatively short except for the
29
 * data URI scheme as defined in RFC 2397. Even for data URI the usual
30
 * maximum size before hitting random practical limits is around 64 KB
31
 * and 4KB is usually a maximum admitted limit for proper operations.
32
 * The value below is more a security limit than anything else and
33
 * really should never be hit by 'normal' operations
34
 * Set to 1 MByte in 2012, this is only enforced on output
35
 */
36
2.11M
#define MAX_URI_LENGTH 1024 * 1024
37
38
static void
39
xmlURIErrMemory(const char *extra)
40
0
{
41
0
    if (extra)
42
0
        __xmlRaiseError(NULL, NULL, NULL,
43
0
                        NULL, NULL, XML_FROM_URI,
44
0
                        XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
45
0
                        extra, NULL, NULL, 0, 0,
46
0
                        "Memory allocation failed : %s\n", extra);
47
0
    else
48
0
        __xmlRaiseError(NULL, NULL, NULL,
49
0
                        NULL, NULL, XML_FROM_URI,
50
0
                        XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
51
0
                        NULL, NULL, NULL, 0, 0,
52
0
                        "Memory allocation failed\n");
53
0
}
54
55
static void xmlCleanURI(xmlURIPtr uri);
56
57
/*
58
 * Old rule from 2396 used in legacy handling code
59
 * alpha    = lowalpha | upalpha
60
 */
61
1.63G
#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
62
63
64
/*
65
 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
66
 *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
67
 *            "u" | "v" | "w" | "x" | "y" | "z"
68
 */
69
70
1.63G
#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
71
72
/*
73
 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
74
 *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
75
 *           "U" | "V" | "W" | "X" | "Y" | "Z"
76
 */
77
459M
#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
78
79
#ifdef IS_DIGIT
80
#undef IS_DIGIT
81
#endif
82
/*
83
 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
84
 */
85
425M
#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
86
87
/*
88
 * alphanum = alpha | digit
89
 */
90
91
1.63G
#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
92
93
/*
94
 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
95
 */
96
97
304M
#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||     \
98
304M
    ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||    \
99
304M
    ((x) == '(') || ((x) == ')'))
100
101
/*
102
 * unwise = "{" | "}" | "|" | "\" | "^" | "`"
103
 */
104
105
#define IS_UNWISE(p)                                                    \
106
200k
      (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \
107
200k
       ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||        \
108
200k
       ((*(p) == ']')) || ((*(p) == '`')))
109
/*
110
 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
111
 *            "[" | "]"
112
 */
113
114
1.15M
#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
115
1.15M
        ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
116
1.15M
        ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
117
1.15M
        ((x) == ']'))
118
119
/*
120
 * unreserved = alphanum | mark
121
 */
122
123
817M
#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
124
125
/*
126
 * Skip to next pointer char, handle escaped sequences
127
 */
128
129
906M
#define NEXT(p) ((*p == '%')? p += 3 : p++)
130
131
/*
132
 * Productions from the spec.
133
 *
134
 *    authority     = server | reg_name
135
 *    reg_name      = 1*( unreserved | escaped | "$" | "," |
136
 *                        ";" | ":" | "@" | "&" | "=" | "+" )
137
 *
138
 * path          = [ abs_path | opaque_part ]
139
 */
140
141
8.05M
#define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
142
143
/************************************************************************
144
 *                  *
145
 *                         RFC 3986 parser        *
146
 *                  *
147
 ************************************************************************/
148
149
1.39G
#define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
150
1.01G
#define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||   \
151
1.01G
                      ((*(p) >= 'A') && (*(p) <= 'Z')))
152
#define ISA_HEXDIG(p)             \
153
440M
       (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||   \
154
440M
        ((*(p) >= 'A') && (*(p) <= 'F')))
155
156
/*
157
 *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
158
 *                     / "*" / "+" / "," / ";" / "="
159
 */
160
#define ISA_SUB_DELIM(p)            \
161
1.01G
      (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||   \
162
77.1M
       ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||   \
163
77.1M
       ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||   \
164
77.1M
       ((*(p) == '=')) || ((*(p) == '\'')))
165
166
/*
167
 *    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
168
 */
169
#define ISA_GEN_DELIM(p)            \
170
      (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) ||         \
171
       ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) ||         \
172
       ((*(p) == '@')))
173
174
/*
175
 *    reserved      = gen-delims / sub-delims
176
 */
177
#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
178
179
/*
180
 *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
181
 */
182
#define ISA_UNRESERVED(p)           \
183
1.88G
      ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||   \
184
944M
       ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
185
186
/*
187
 *    pct-encoded   = "%" HEXDIG HEXDIG
188
 */
189
#define ISA_PCT_ENCODED(p)            \
190
1.24G
     ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
191
192
/*
193
 *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
194
 */
195
#define ISA_PCHAR(p)              \
196
1.74G
     (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||  \
197
918M
      ((*(p) == ':')) || ((*(p) == '@')))
198
199
/**
200
 * xmlParse3986Scheme:
201
 * @uri:  pointer to an URI structure
202
 * @str:  pointer to the string to analyze
203
 *
204
 * Parse an URI scheme
205
 *
206
 * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
207
 *
208
 * Returns 0 or the error code
209
 */
210
static int
211
10.0M
xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
212
10.0M
    const char *cur;
213
214
10.0M
    if (str == NULL)
215
0
  return(-1);
216
217
10.0M
    cur = *str;
218
10.0M
    if (!ISA_ALPHA(cur))
219
2.12M
  return(2);
220
7.97M
    cur++;
221
31.1M
    while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
222
31.1M
           (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
223
7.97M
    if (uri != NULL) {
224
7.97M
  if (uri->scheme != NULL) xmlFree(uri->scheme);
225
7.97M
  uri->scheme = STRNDUP(*str, cur - *str);
226
7.97M
    }
227
7.97M
    *str = cur;
228
7.97M
    return(0);
229
10.0M
}
230
231
/**
232
 * xmlParse3986Fragment:
233
 * @uri:  pointer to an URI structure
234
 * @str:  pointer to the string to analyze
235
 *
236
 * Parse the query part of an URI
237
 *
238
 * fragment      = *( pchar / "/" / "?" )
239
 * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
240
 *       in the fragment identifier but this is used very broadly for
241
 *       xpointer scheme selection, so we are allowing it here to not break
242
 *       for example all the DocBook processing chains.
243
 *
244
 * Returns 0 or the error code
245
 */
246
static int
247
xmlParse3986Fragment(xmlURIPtr uri, const char **str)
248
658k
{
249
658k
    const char *cur;
250
251
658k
    if (str == NULL)
252
0
        return (-1);
253
254
658k
    cur = *str;
255
256
28.0M
    while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
257
28.0M
           (*cur == '[') || (*cur == ']') ||
258
28.0M
           ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
259
27.3M
        NEXT(cur);
260
658k
    if (uri != NULL) {
261
658k
        if (uri->fragment != NULL)
262
0
            xmlFree(uri->fragment);
263
658k
  if (uri->cleanup & 2)
264
0
      uri->fragment = STRNDUP(*str, cur - *str);
265
658k
  else
266
658k
      uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
267
658k
    }
268
658k
    *str = cur;
269
658k
    return (0);
270
658k
}
271
272
/**
273
 * xmlParse3986Query:
274
 * @uri:  pointer to an URI structure
275
 * @str:  pointer to the string to analyze
276
 *
277
 * Parse the query part of an URI
278
 *
279
 * query = *uric
280
 *
281
 * Returns 0 or the error code
282
 */
283
static int
284
xmlParse3986Query(xmlURIPtr uri, const char **str)
285
84.1k
{
286
84.1k
    const char *cur;
287
288
84.1k
    if (str == NULL)
289
0
        return (-1);
290
291
84.1k
    cur = *str;
292
293
21.9M
    while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
294
21.9M
           ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
295
21.8M
        NEXT(cur);
296
84.1k
    if (uri != NULL) {
297
84.1k
        if (uri->query != NULL)
298
0
            xmlFree(uri->query);
299
84.1k
  if (uri->cleanup & 2)
300
0
      uri->query = STRNDUP(*str, cur - *str);
301
84.1k
  else
302
84.1k
      uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
303
304
  /* Save the raw bytes of the query as well.
305
   * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
306
   */
307
84.1k
  if (uri->query_raw != NULL)
308
0
      xmlFree (uri->query_raw);
309
84.1k
  uri->query_raw = STRNDUP (*str, cur - *str);
310
84.1k
    }
311
84.1k
    *str = cur;
312
84.1k
    return (0);
313
84.1k
}
314
315
/**
316
 * xmlParse3986Port:
317
 * @uri:  pointer to an URI structure
318
 * @str:  the string to analyze
319
 *
320
 * Parse a port part and fills in the appropriate fields
321
 * of the @uri structure
322
 *
323
 * port          = *DIGIT
324
 *
325
 * Returns 0 or the error code
326
 */
327
static int
328
xmlParse3986Port(xmlURIPtr uri, const char **str)
329
25.4k
{
330
25.4k
    const char *cur = *str;
331
25.4k
    int port = 0;
332
333
25.4k
    if (ISA_DIGIT(cur)) {
334
35.1k
  while (ISA_DIGIT(cur)) {
335
29.7k
            int digit = *cur - '0';
336
337
29.7k
            if (port > INT_MAX / 10)
338
468
                return(1);
339
29.3k
            port *= 10;
340
29.3k
            if (port > INT_MAX - digit)
341
0
                return(1);
342
29.3k
      port += digit;
343
344
29.3k
      cur++;
345
29.3k
  }
346
5.37k
  if (uri != NULL)
347
5.37k
      uri->port = port;
348
5.37k
  *str = cur;
349
5.37k
  return(0);
350
5.83k
    }
351
19.6k
    return(1);
352
25.4k
}
353
354
/**
355
 * xmlParse3986Userinfo:
356
 * @uri:  pointer to an URI structure
357
 * @str:  the string to analyze
358
 *
359
 * Parse an user information part and fills in the appropriate fields
360
 * of the @uri structure
361
 *
362
 * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
363
 *
364
 * Returns 0 or the error code
365
 */
366
static int
367
xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
368
770k
{
369
770k
    const char *cur;
370
371
770k
    cur = *str;
372
14.6M
    while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
373
14.6M
           ISA_SUB_DELIM(cur) || (*cur == ':'))
374
13.8M
  NEXT(cur);
375
770k
    if (*cur == '@') {
376
41.1k
  if (uri != NULL) {
377
41.1k
      if (uri->user != NULL) xmlFree(uri->user);
378
41.1k
      if (uri->cleanup & 2)
379
0
    uri->user = STRNDUP(*str, cur - *str);
380
41.1k
      else
381
41.1k
    uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
382
41.1k
  }
383
41.1k
  *str = cur;
384
41.1k
  return(0);
385
41.1k
    }
386
729k
    return(1);
387
770k
}
388
389
/**
390
 * xmlParse3986DecOctet:
391
 * @str:  the string to analyze
392
 *
393
 *    dec-octet     = DIGIT                 ; 0-9
394
 *                  / %x31-39 DIGIT         ; 10-99
395
 *                  / "1" 2DIGIT            ; 100-199
396
 *                  / "2" %x30-34 DIGIT     ; 200-249
397
 *                  / "25" %x30-35          ; 250-255
398
 *
399
 * Skip a dec-octet.
400
 *
401
 * Returns 0 if found and skipped, 1 otherwise
402
 */
403
static int
404
63.6k
xmlParse3986DecOctet(const char **str) {
405
63.6k
    const char *cur = *str;
406
407
63.6k
    if (!(ISA_DIGIT(cur)))
408
3.78k
        return(1);
409
59.8k
    if (!ISA_DIGIT(cur+1))
410
15.5k
  cur++;
411
44.3k
    else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
412
9.33k
  cur += 2;
413
35.0k
    else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
414
4.09k
  cur += 3;
415
30.9k
    else if ((*cur == '2') && (*(cur + 1) >= '0') &&
416
30.9k
       (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
417
8.93k
  cur += 3;
418
22.0k
    else if ((*cur == '2') && (*(cur + 1) == '5') &&
419
22.0k
       (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
420
2.55k
  cur += 3;
421
19.4k
    else
422
19.4k
        return(1);
423
40.4k
    *str = cur;
424
40.4k
    return(0);
425
59.8k
}
426
/**
427
 * xmlParse3986Host:
428
 * @uri:  pointer to an URI structure
429
 * @str:  the string to analyze
430
 *
431
 * Parse an host part and fills in the appropriate fields
432
 * of the @uri structure
433
 *
434
 * host          = IP-literal / IPv4address / reg-name
435
 * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
436
 * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
437
 * reg-name      = *( unreserved / pct-encoded / sub-delims )
438
 *
439
 * Returns 0 or the error code
440
 */
441
static int
442
xmlParse3986Host(xmlURIPtr uri, const char **str)
443
770k
{
444
770k
    const char *cur = *str;
445
770k
    const char *host;
446
447
770k
    host = cur;
448
    /*
449
     * IPv6 and future addressing scheme are enclosed between brackets
450
     */
451
770k
    if (*cur == '[') {
452
5.60k
        cur++;
453
662k
  while ((*cur != ']') && (*cur != 0))
454
656k
      cur++;
455
5.60k
  if (*cur != ']')
456
4.72k
      return(1);
457
878
  cur++;
458
878
  goto found;
459
5.60k
    }
460
    /*
461
     * try to parse an IPv4
462
     */
463
764k
    if (ISA_DIGIT(cur)) {
464
47.3k
        if (xmlParse3986DecOctet(&cur) != 0)
465
15.0k
      goto not_ipv4;
466
32.2k
  if (*cur != '.')
467
17.1k
      goto not_ipv4;
468
15.1k
  cur++;
469
15.1k
        if (xmlParse3986DecOctet(&cur) != 0)
470
6.95k
      goto not_ipv4;
471
8.16k
  if (*cur != '.')
472
6.95k
      goto not_ipv4;
473
1.21k
        if (xmlParse3986DecOctet(&cur) != 0)
474
1.21k
      goto not_ipv4;
475
0
  if (*cur != '.')
476
0
      goto not_ipv4;
477
0
        if (xmlParse3986DecOctet(&cur) != 0)
478
0
      goto not_ipv4;
479
0
  goto found;
480
47.3k
not_ipv4:
481
47.3k
        cur = *str;
482
47.3k
    }
483
    /*
484
     * then this should be a hostname which can be empty
485
     */
486
11.4M
    while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
487
10.6M
        NEXT(cur);
488
765k
found:
489
765k
    if (uri != NULL) {
490
765k
  if (uri->authority != NULL) xmlFree(uri->authority);
491
765k
  uri->authority = NULL;
492
765k
  if (uri->server != NULL) xmlFree(uri->server);
493
765k
  if (cur != host) {
494
719k
      if (uri->cleanup & 2)
495
0
    uri->server = STRNDUP(host, cur - host);
496
719k
      else
497
719k
    uri->server = xmlURIUnescapeString(host, cur - host, NULL);
498
719k
  } else
499
45.9k
      uri->server = NULL;
500
765k
    }
501
765k
    *str = cur;
502
765k
    return(0);
503
764k
}
504
505
/**
506
 * xmlParse3986Authority:
507
 * @uri:  pointer to an URI structure
508
 * @str:  the string to analyze
509
 *
510
 * Parse an authority part and fills in the appropriate fields
511
 * of the @uri structure
512
 *
513
 * authority     = [ userinfo "@" ] host [ ":" port ]
514
 *
515
 * Returns 0 or the error code
516
 */
517
static int
518
xmlParse3986Authority(xmlURIPtr uri, const char **str)
519
770k
{
520
770k
    const char *cur;
521
770k
    int ret;
522
523
770k
    cur = *str;
524
    /*
525
     * try to parse an userinfo and check for the trailing @
526
     */
527
770k
    ret = xmlParse3986Userinfo(uri, &cur);
528
770k
    if ((ret != 0) || (*cur != '@'))
529
729k
        cur = *str;
530
41.1k
    else
531
41.1k
        cur++;
532
770k
    ret = xmlParse3986Host(uri, &cur);
533
770k
    if (ret != 0) return(ret);
534
765k
    if (*cur == ':') {
535
25.4k
        cur++;
536
25.4k
        ret = xmlParse3986Port(uri, &cur);
537
25.4k
  if (ret != 0) return(ret);
538
25.4k
    }
539
745k
    *str = cur;
540
745k
    return(0);
541
765k
}
542
543
/**
544
 * xmlParse3986Segment:
545
 * @str:  the string to analyze
546
 * @forbid: an optional forbidden character
547
 * @empty: allow an empty segment
548
 *
549
 * Parse a segment and fills in the appropriate fields
550
 * of the @uri structure
551
 *
552
 * segment       = *pchar
553
 * segment-nz    = 1*pchar
554
 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
555
 *               ; non-zero-length segment without any colon ":"
556
 *
557
 * Returns 0 or the error code
558
 */
559
static int
560
xmlParse3986Segment(const char **str, char forbid, int empty)
561
28.0M
{
562
28.0M
    const char *cur;
563
564
28.0M
    cur = *str;
565
28.0M
    if (!ISA_PCHAR(cur)) {
566
506k
        if (empty)
567
494k
      return(0);
568
12.1k
  return(1);
569
506k
    }
570
831M
    while (ISA_PCHAR(cur) && (*cur != forbid))
571
803M
        NEXT(cur);
572
27.5M
    *str = cur;
573
27.5M
    return (0);
574
28.0M
}
575
576
/**
577
 * xmlParse3986PathAbEmpty:
578
 * @uri:  pointer to an URI structure
579
 * @str:  the string to analyze
580
 *
581
 * Parse an path absolute or empty and fills in the appropriate fields
582
 * of the @uri structure
583
 *
584
 * path-abempty  = *( "/" segment )
585
 *
586
 * Returns 0 or the error code
587
 */
588
static int
589
xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
590
745k
{
591
745k
    const char *cur;
592
745k
    int ret;
593
594
745k
    cur = *str;
595
596
2.26M
    while (*cur == '/') {
597
1.51M
        cur++;
598
1.51M
  ret = xmlParse3986Segment(&cur, 0, 1);
599
1.51M
  if (ret != 0) return(ret);
600
1.51M
    }
601
745k
    if (uri != NULL) {
602
745k
  if (uri->path != NULL) xmlFree(uri->path);
603
745k
        if (*str != cur) {
604
601k
            if (uri->cleanup & 2)
605
0
                uri->path = STRNDUP(*str, cur - *str);
606
601k
            else
607
601k
                uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
608
601k
        } else {
609
144k
            uri->path = NULL;
610
144k
        }
611
745k
    }
612
745k
    *str = cur;
613
745k
    return (0);
614
745k
}
615
616
/**
617
 * xmlParse3986PathAbsolute:
618
 * @uri:  pointer to an URI structure
619
 * @str:  the string to analyze
620
 *
621
 * Parse an path absolute and fills in the appropriate fields
622
 * of the @uri structure
623
 *
624
 * path-absolute = "/" [ segment-nz *( "/" segment ) ]
625
 *
626
 * Returns 0 or the error code
627
 */
628
static int
629
xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
630
88.2k
{
631
88.2k
    const char *cur;
632
88.2k
    int ret;
633
634
88.2k
    cur = *str;
635
636
88.2k
    if (*cur != '/')
637
0
        return(1);
638
88.2k
    cur++;
639
88.2k
    ret = xmlParse3986Segment(&cur, 0, 0);
640
88.2k
    if (ret == 0) {
641
1.36M
  while (*cur == '/') {
642
1.28M
      cur++;
643
1.28M
      ret = xmlParse3986Segment(&cur, 0, 1);
644
1.28M
      if (ret != 0) return(ret);
645
1.28M
  }
646
76.0k
    }
647
88.2k
    if (uri != NULL) {
648
88.2k
  if (uri->path != NULL) xmlFree(uri->path);
649
88.2k
        if (cur != *str) {
650
88.2k
            if (uri->cleanup & 2)
651
0
                uri->path = STRNDUP(*str, cur - *str);
652
88.2k
            else
653
88.2k
                uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
654
88.2k
        } else {
655
0
            uri->path = NULL;
656
0
        }
657
88.2k
    }
658
88.2k
    *str = cur;
659
88.2k
    return (0);
660
88.2k
}
661
662
/**
663
 * xmlParse3986PathRootless:
664
 * @uri:  pointer to an URI structure
665
 * @str:  the string to analyze
666
 *
667
 * Parse an path without root and fills in the appropriate fields
668
 * of the @uri structure
669
 *
670
 * path-rootless = segment-nz *( "/" segment )
671
 *
672
 * Returns 0 or the error code
673
 */
674
static int
675
xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
676
162k
{
677
162k
    const char *cur;
678
162k
    int ret;
679
680
162k
    cur = *str;
681
682
162k
    ret = xmlParse3986Segment(&cur, 0, 0);
683
162k
    if (ret != 0) return(ret);
684
270k
    while (*cur == '/') {
685
107k
        cur++;
686
107k
  ret = xmlParse3986Segment(&cur, 0, 1);
687
107k
  if (ret != 0) return(ret);
688
107k
    }
689
162k
    if (uri != NULL) {
690
162k
  if (uri->path != NULL) xmlFree(uri->path);
691
162k
        if (cur != *str) {
692
162k
            if (uri->cleanup & 2)
693
0
                uri->path = STRNDUP(*str, cur - *str);
694
162k
            else
695
162k
                uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
696
162k
        } else {
697
0
            uri->path = NULL;
698
0
        }
699
162k
    }
700
162k
    *str = cur;
701
162k
    return (0);
702
162k
}
703
704
/**
705
 * xmlParse3986PathNoScheme:
706
 * @uri:  pointer to an URI structure
707
 * @str:  the string to analyze
708
 *
709
 * Parse an path which is not a scheme and fills in the appropriate fields
710
 * of the @uri structure
711
 *
712
 * path-noscheme = segment-nz-nc *( "/" segment )
713
 *
714
 * Returns 0 or the error code
715
 */
716
static int
717
xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
718
8.49M
{
719
8.49M
    const char *cur;
720
8.49M
    int ret;
721
722
8.49M
    cur = *str;
723
724
8.49M
    ret = xmlParse3986Segment(&cur, ':', 0);
725
8.49M
    if (ret != 0) return(ret);
726
24.8M
    while (*cur == '/') {
727
16.3M
        cur++;
728
16.3M
  ret = xmlParse3986Segment(&cur, 0, 1);
729
16.3M
  if (ret != 0) return(ret);
730
16.3M
    }
731
8.49M
    if (uri != NULL) {
732
8.49M
  if (uri->path != NULL) xmlFree(uri->path);
733
8.49M
        if (cur != *str) {
734
8.47M
            if (uri->cleanup & 2)
735
0
                uri->path = STRNDUP(*str, cur - *str);
736
8.47M
            else
737
8.47M
                uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
738
8.47M
        } else {
739
16.7k
            uri->path = NULL;
740
16.7k
        }
741
8.49M
    }
742
8.49M
    *str = cur;
743
8.49M
    return (0);
744
8.49M
}
745
746
/**
747
 * xmlParse3986HierPart:
748
 * @uri:  pointer to an URI structure
749
 * @str:  the string to analyze
750
 *
751
 * Parse an hierarchical part and fills in the appropriate fields
752
 * of the @uri structure
753
 *
754
 * hier-part     = "//" authority path-abempty
755
 *                / path-absolute
756
 *                / path-rootless
757
 *                / path-empty
758
 *
759
 * Returns 0 or the error code
760
 */
761
static int
762
xmlParse3986HierPart(xmlURIPtr uri, const char **str)
763
935k
{
764
935k
    const char *cur;
765
935k
    int ret;
766
767
935k
    cur = *str;
768
769
935k
    if ((*cur == '/') && (*(cur + 1) == '/')) {
770
746k
        cur += 2;
771
746k
  ret = xmlParse3986Authority(uri, &cur);
772
746k
  if (ret != 0) return(ret);
773
722k
  if (uri->server == NULL)
774
28.1k
      uri->port = -1;
775
722k
  ret = xmlParse3986PathAbEmpty(uri, &cur);
776
722k
  if (ret != 0) return(ret);
777
722k
  *str = cur;
778
722k
  return(0);
779
722k
    } else if (*cur == '/') {
780
13.4k
        ret = xmlParse3986PathAbsolute(uri, &cur);
781
13.4k
  if (ret != 0) return(ret);
782
174k
    } else if (ISA_PCHAR(cur)) {
783
162k
        ret = xmlParse3986PathRootless(uri, &cur);
784
162k
  if (ret != 0) return(ret);
785
162k
    } else {
786
  /* path-empty is effectively empty */
787
12.1k
  if (uri != NULL) {
788
12.1k
      if (uri->path != NULL) xmlFree(uri->path);
789
12.1k
      uri->path = NULL;
790
12.1k
  }
791
12.1k
    }
792
188k
    *str = cur;
793
188k
    return (0);
794
935k
}
795
796
/**
797
 * xmlParse3986RelativeRef:
798
 * @uri:  pointer to an URI structure
799
 * @str:  the string to analyze
800
 *
801
 * Parse an URI string and fills in the appropriate fields
802
 * of the @uri structure
803
 *
804
 * relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
805
 * relative-part = "//" authority path-abempty
806
 *               / path-absolute
807
 *               / path-noscheme
808
 *               / path-empty
809
 *
810
 * Returns 0 or the error code
811
 */
812
static int
813
9.32M
xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
814
9.32M
    int ret;
815
816
9.32M
    if ((*str == '/') && (*(str + 1) == '/')) {
817
23.3k
        str += 2;
818
23.3k
  ret = xmlParse3986Authority(uri, &str);
819
23.3k
  if (ret != 0) return(ret);
820
22.8k
  ret = xmlParse3986PathAbEmpty(uri, &str);
821
22.8k
  if (ret != 0) return(ret);
822
9.30M
    } else if (*str == '/') {
823
74.7k
  ret = xmlParse3986PathAbsolute(uri, &str);
824
74.7k
  if (ret != 0) return(ret);
825
9.23M
    } else if (ISA_PCHAR(str)) {
826
8.49M
        ret = xmlParse3986PathNoScheme(uri, &str);
827
8.49M
  if (ret != 0) return(ret);
828
8.49M
    } else {
829
  /* path-empty is effectively empty */
830
737k
  if (uri != NULL) {
831
737k
      if (uri->path != NULL) xmlFree(uri->path);
832
737k
      uri->path = NULL;
833
737k
  }
834
737k
    }
835
836
9.32M
    if (*str == '?') {
837
66.8k
  str++;
838
66.8k
  ret = xmlParse3986Query(uri, &str);
839
66.8k
  if (ret != 0) return(ret);
840
66.8k
    }
841
9.32M
    if (*str == '#') {
842
635k
  str++;
843
635k
  ret = xmlParse3986Fragment(uri, &str);
844
635k
  if (ret != 0) return(ret);
845
635k
    }
846
9.32M
    if (*str != 0) {
847
1.11M
  xmlCleanURI(uri);
848
1.11M
  return(1);
849
1.11M
    }
850
8.20M
    return(0);
851
9.32M
}
852
853
854
/**
855
 * xmlParse3986URI:
856
 * @uri:  pointer to an URI structure
857
 * @str:  the string to analyze
858
 *
859
 * Parse an URI string and fills in the appropriate fields
860
 * of the @uri structure
861
 *
862
 * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
863
 *
864
 * Returns 0 or the error code
865
 */
866
static int
867
10.0M
xmlParse3986URI(xmlURIPtr uri, const char *str) {
868
10.0M
    int ret;
869
870
10.0M
    ret = xmlParse3986Scheme(uri, &str);
871
10.0M
    if (ret != 0) return(ret);
872
7.97M
    if (*str != ':') {
873
7.03M
  return(1);
874
7.03M
    }
875
935k
    str++;
876
935k
    ret = xmlParse3986HierPart(uri, &str);
877
935k
    if (ret != 0) return(ret);
878
910k
    if (*str == '?') {
879
17.3k
  str++;
880
17.3k
  ret = xmlParse3986Query(uri, &str);
881
17.3k
  if (ret != 0) return(ret);
882
17.3k
    }
883
910k
    if (*str == '#') {
884
23.5k
  str++;
885
23.5k
  ret = xmlParse3986Fragment(uri, &str);
886
23.5k
  if (ret != 0) return(ret);
887
23.5k
    }
888
910k
    if (*str != 0) {
889
145k
  xmlCleanURI(uri);
890
145k
  return(1);
891
145k
    }
892
765k
    return(0);
893
910k
}
894
895
/**
896
 * xmlParse3986URIReference:
897
 * @uri:  pointer to an URI structure
898
 * @str:  the string to analyze
899
 *
900
 * Parse an URI reference string and fills in the appropriate fields
901
 * of the @uri structure
902
 *
903
 * URI-reference = URI / relative-ref
904
 *
905
 * Returns 0 or the error code
906
 */
907
static int
908
10.0M
xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
909
10.0M
    int ret;
910
911
10.0M
    if (str == NULL)
912
0
  return(-1);
913
10.0M
    xmlCleanURI(uri);
914
915
    /*
916
     * Try first to parse absolute refs, then fallback to relative if
917
     * it fails.
918
     */
919
10.0M
    ret = xmlParse3986URI(uri, str);
920
10.0M
    if (ret != 0) {
921
9.32M
  xmlCleanURI(uri);
922
9.32M
        ret = xmlParse3986RelativeRef(uri, str);
923
9.32M
  if (ret != 0) {
924
1.12M
      xmlCleanURI(uri);
925
1.12M
      return(ret);
926
1.12M
  }
927
9.32M
    }
928
8.97M
    return(0);
929
10.0M
}
930
931
/**
932
 * xmlParseURI:
933
 * @str:  the URI string to analyze
934
 *
935
 * Parse an URI based on RFC 3986
936
 *
937
 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
938
 *
939
 * Returns a newly built xmlURIPtr or NULL in case of error
940
 */
941
xmlURIPtr
942
6.16M
xmlParseURI(const char *str) {
943
6.16M
    xmlURIPtr uri;
944
6.16M
    int ret;
945
946
6.16M
    if (str == NULL)
947
0
  return(NULL);
948
6.16M
    uri = xmlCreateURI();
949
6.16M
    if (uri != NULL) {
950
6.16M
  ret = xmlParse3986URIReference(uri, str);
951
6.16M
        if (ret) {
952
980k
      xmlFreeURI(uri);
953
980k
      return(NULL);
954
980k
  }
955
6.16M
    }
956
5.18M
    return(uri);
957
6.16M
}
958
959
/**
960
 * xmlParseURIReference:
961
 * @uri:  pointer to an URI structure
962
 * @str:  the string to analyze
963
 *
964
 * Parse an URI reference string based on RFC 3986 and fills in the
965
 * appropriate fields of the @uri structure
966
 *
967
 * URI-reference = URI / relative-ref
968
 *
969
 * Returns 0 or the error code
970
 */
971
int
972
3.93M
xmlParseURIReference(xmlURIPtr uri, const char *str) {
973
3.93M
    return(xmlParse3986URIReference(uri, str));
974
3.93M
}
975
976
/**
977
 * xmlParseURIRaw:
978
 * @str:  the URI string to analyze
979
 * @raw:  if 1 unescaping of URI pieces are disabled
980
 *
981
 * Parse an URI but allows to keep intact the original fragments.
982
 *
983
 * URI-reference = URI / relative-ref
984
 *
985
 * Returns a newly built xmlURIPtr or NULL in case of error
986
 */
987
xmlURIPtr
988
0
xmlParseURIRaw(const char *str, int raw) {
989
0
    xmlURIPtr uri;
990
0
    int ret;
991
992
0
    if (str == NULL)
993
0
  return(NULL);
994
0
    uri = xmlCreateURI();
995
0
    if (uri != NULL) {
996
0
        if (raw) {
997
0
      uri->cleanup |= 2;
998
0
  }
999
0
  ret = xmlParseURIReference(uri, str);
1000
0
        if (ret) {
1001
0
      xmlFreeURI(uri);
1002
0
      return(NULL);
1003
0
  }
1004
0
    }
1005
0
    return(uri);
1006
0
}
1007
1008
/************************************************************************
1009
 *                  *
1010
 *      Generic URI structure functions     *
1011
 *                  *
1012
 ************************************************************************/
1013
1014
/**
1015
 * xmlCreateURI:
1016
 *
1017
 * Simply creates an empty xmlURI
1018
 *
1019
 * Returns the new structure or NULL in case of error
1020
 */
1021
xmlURIPtr
1022
10.5M
xmlCreateURI(void) {
1023
10.5M
    xmlURIPtr ret;
1024
1025
10.5M
    ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
1026
10.5M
    if (ret == NULL) {
1027
0
        xmlURIErrMemory("creating URI structure\n");
1028
0
  return(NULL);
1029
0
    }
1030
10.5M
    memset(ret, 0, sizeof(xmlURI));
1031
10.5M
    return(ret);
1032
10.5M
}
1033
1034
/**
1035
 * xmlSaveUriRealloc:
1036
 *
1037
 * Function to handle properly a reallocation when saving an URI
1038
 * Also imposes some limit on the length of an URI string output
1039
 */
1040
static xmlChar *
1041
2.11M
xmlSaveUriRealloc(xmlChar *ret, int *max) {
1042
2.11M
    xmlChar *temp;
1043
2.11M
    int tmp;
1044
1045
2.11M
    if (*max > MAX_URI_LENGTH) {
1046
0
        xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
1047
0
        return(NULL);
1048
0
    }
1049
2.11M
    tmp = *max * 2;
1050
2.11M
    temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
1051
2.11M
    if (temp == NULL) {
1052
0
        xmlURIErrMemory("saving URI\n");
1053
0
        return(NULL);
1054
0
    }
1055
2.11M
    *max = tmp;
1056
2.11M
    return(temp);
1057
2.11M
}
1058
1059
/**
1060
 * xmlSaveUri:
1061
 * @uri:  pointer to an xmlURI
1062
 *
1063
 * Save the URI as an escaped string
1064
 *
1065
 * Returns a new string (to be deallocated by caller)
1066
 */
1067
xmlChar *
1068
4.22M
xmlSaveUri(xmlURIPtr uri) {
1069
4.22M
    xmlChar *ret = NULL;
1070
4.22M
    xmlChar *temp;
1071
4.22M
    const char *p;
1072
4.22M
    int len;
1073
4.22M
    int max;
1074
1075
4.22M
    if (uri == NULL) return(NULL);
1076
1077
1078
4.22M
    max = 80;
1079
4.22M
    ret = (xmlChar *) xmlMallocAtomic(max + 1);
1080
4.22M
    if (ret == NULL) {
1081
0
        xmlURIErrMemory("saving URI\n");
1082
0
  return(NULL);
1083
0
    }
1084
4.22M
    len = 0;
1085
1086
4.22M
    if (uri->scheme != NULL) {
1087
77.4k
  p = uri->scheme;
1088
1.07M
  while (*p != 0) {
1089
994k
      if (len >= max) {
1090
3.90k
                temp = xmlSaveUriRealloc(ret, &max);
1091
3.90k
                if (temp == NULL) goto mem_error;
1092
3.90k
    ret = temp;
1093
3.90k
      }
1094
994k
      ret[len++] = *p++;
1095
994k
  }
1096
77.4k
  if (len >= max) {
1097
962
            temp = xmlSaveUriRealloc(ret, &max);
1098
962
            if (temp == NULL) goto mem_error;
1099
962
            ret = temp;
1100
962
  }
1101
77.4k
  ret[len++] = ':';
1102
77.4k
    }
1103
4.22M
    if (uri->opaque != NULL) {
1104
0
  p = uri->opaque;
1105
0
  while (*p != 0) {
1106
0
      if (len + 3 >= max) {
1107
0
                temp = xmlSaveUriRealloc(ret, &max);
1108
0
                if (temp == NULL) goto mem_error;
1109
0
                ret = temp;
1110
0
      }
1111
0
      if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1112
0
    ret[len++] = *p++;
1113
0
      else {
1114
0
    int val = *(unsigned char *)p++;
1115
0
    int hi = val / 0x10, lo = val % 0x10;
1116
0
    ret[len++] = '%';
1117
0
    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1118
0
    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1119
0
      }
1120
0
  }
1121
4.22M
    } else {
1122
4.22M
  if ((uri->server != NULL) || (uri->port == -1)) {
1123
51.4k
      if (len + 3 >= max) {
1124
358
                temp = xmlSaveUriRealloc(ret, &max);
1125
358
                if (temp == NULL) goto mem_error;
1126
358
                ret = temp;
1127
358
      }
1128
51.4k
      ret[len++] = '/';
1129
51.4k
      ret[len++] = '/';
1130
51.4k
      if (uri->user != NULL) {
1131
18.9k
    p = uri->user;
1132
2.17M
    while (*p != 0) {
1133
2.15M
        if (len + 3 >= max) {
1134
11.4k
                        temp = xmlSaveUriRealloc(ret, &max);
1135
11.4k
                        if (temp == NULL) goto mem_error;
1136
11.4k
                        ret = temp;
1137
11.4k
        }
1138
2.15M
        if ((IS_UNRESERVED(*(p))) ||
1139
2.15M
      ((*(p) == ';')) || ((*(p) == ':')) ||
1140
2.15M
      ((*(p) == '&')) || ((*(p) == '=')) ||
1141
2.15M
      ((*(p) == '+')) || ((*(p) == '$')) ||
1142
2.15M
      ((*(p) == ',')))
1143
1.66M
      ret[len++] = *p++;
1144
488k
        else {
1145
488k
      int val = *(unsigned char *)p++;
1146
488k
      int hi = val / 0x10, lo = val % 0x10;
1147
488k
      ret[len++] = '%';
1148
488k
      ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1149
488k
      ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1150
488k
        }
1151
2.15M
    }
1152
18.9k
    if (len + 3 >= max) {
1153
805
                    temp = xmlSaveUriRealloc(ret, &max);
1154
805
                    if (temp == NULL) goto mem_error;
1155
805
                    ret = temp;
1156
805
    }
1157
18.9k
    ret[len++] = '@';
1158
18.9k
      }
1159
51.4k
      if (uri->server != NULL) {
1160
42.1k
    p = uri->server;
1161
1.35M
    while (*p != 0) {
1162
1.30M
        if (len >= max) {
1163
4.32k
      temp = xmlSaveUriRealloc(ret, &max);
1164
4.32k
      if (temp == NULL) goto mem_error;
1165
4.32k
      ret = temp;
1166
4.32k
        }
1167
1.30M
        ret[len++] = *p++;
1168
1.30M
    }
1169
42.1k
    if (uri->port > 0) {
1170
2.58k
        if (len + 10 >= max) {
1171
480
      temp = xmlSaveUriRealloc(ret, &max);
1172
480
      if (temp == NULL) goto mem_error;
1173
480
      ret = temp;
1174
480
        }
1175
2.58k
        len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1176
2.58k
    }
1177
42.1k
      }
1178
4.17M
  } else if (uri->authority != NULL) {
1179
0
      if (len + 3 >= max) {
1180
0
                temp = xmlSaveUriRealloc(ret, &max);
1181
0
                if (temp == NULL) goto mem_error;
1182
0
                ret = temp;
1183
0
      }
1184
0
      ret[len++] = '/';
1185
0
      ret[len++] = '/';
1186
0
      p = uri->authority;
1187
0
      while (*p != 0) {
1188
0
    if (len + 3 >= max) {
1189
0
                    temp = xmlSaveUriRealloc(ret, &max);
1190
0
                    if (temp == NULL) goto mem_error;
1191
0
                    ret = temp;
1192
0
    }
1193
0
    if ((IS_UNRESERVED(*(p))) ||
1194
0
                    ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1195
0
                    ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1196
0
                    ((*(p) == '=')) || ((*(p) == '+')))
1197
0
        ret[len++] = *p++;
1198
0
    else {
1199
0
        int val = *(unsigned char *)p++;
1200
0
        int hi = val / 0x10, lo = val % 0x10;
1201
0
        ret[len++] = '%';
1202
0
        ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1203
0
        ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1204
0
    }
1205
0
      }
1206
4.17M
  } else if (uri->scheme != NULL) {
1207
30.7k
      if (len + 3 >= max) {
1208
1.49k
                temp = xmlSaveUriRealloc(ret, &max);
1209
1.49k
                if (temp == NULL) goto mem_error;
1210
1.49k
                ret = temp;
1211
1.49k
      }
1212
30.7k
  }
1213
4.22M
  if (uri->path != NULL) {
1214
4.06M
      p = uri->path;
1215
      /*
1216
       * the colon in file:///d: should not be escaped or
1217
       * Windows accesses fail later.
1218
       */
1219
4.06M
      if ((uri->scheme != NULL) &&
1220
4.06M
    (p[0] == '/') &&
1221
4.06M
    (((p[1] >= 'a') && (p[1] <= 'z')) ||
1222
34.7k
     ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1223
4.06M
    (p[2] == ':') &&
1224
4.06M
          (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1225
300
    if (len + 3 >= max) {
1226
0
                    temp = xmlSaveUriRealloc(ret, &max);
1227
0
                    if (temp == NULL) goto mem_error;
1228
0
                    ret = temp;
1229
0
    }
1230
300
    ret[len++] = *p++;
1231
300
    ret[len++] = *p++;
1232
300
    ret[len++] = *p++;
1233
300
      }
1234
769M
      while (*p != 0) {
1235
765M
    if (len + 3 >= max) {
1236
1.98M
                    temp = xmlSaveUriRealloc(ret, &max);
1237
1.98M
                    if (temp == NULL) goto mem_error;
1238
1.98M
                    ret = temp;
1239
1.98M
    }
1240
765M
    if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1241
765M
                    ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1242
765M
              ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1243
765M
              ((*(p) == ',')))
1244
558M
        ret[len++] = *p++;
1245
207M
    else {
1246
207M
        int val = *(unsigned char *)p++;
1247
207M
        int hi = val / 0x10, lo = val % 0x10;
1248
207M
        ret[len++] = '%';
1249
207M
        ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1250
207M
        ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1251
207M
    }
1252
765M
      }
1253
4.06M
  }
1254
4.22M
  if (uri->query_raw != NULL) {
1255
17.4k
      if (len + 1 >= max) {
1256
547
                temp = xmlSaveUriRealloc(ret, &max);
1257
547
                if (temp == NULL) goto mem_error;
1258
547
                ret = temp;
1259
547
      }
1260
17.4k
      ret[len++] = '?';
1261
17.4k
      p = uri->query_raw;
1262
18.7M
      while (*p != 0) {
1263
18.7M
    if (len + 1 >= max) {
1264
11.6k
                    temp = xmlSaveUriRealloc(ret, &max);
1265
11.6k
                    if (temp == NULL) goto mem_error;
1266
11.6k
                    ret = temp;
1267
11.6k
    }
1268
18.7M
    ret[len++] = *p++;
1269
18.7M
      }
1270
4.21M
  } else if (uri->query != NULL) {
1271
0
      if (len + 3 >= max) {
1272
0
                temp = xmlSaveUriRealloc(ret, &max);
1273
0
                if (temp == NULL) goto mem_error;
1274
0
                ret = temp;
1275
0
      }
1276
0
      ret[len++] = '?';
1277
0
      p = uri->query;
1278
0
      while (*p != 0) {
1279
0
    if (len + 3 >= max) {
1280
0
                    temp = xmlSaveUriRealloc(ret, &max);
1281
0
                    if (temp == NULL) goto mem_error;
1282
0
                    ret = temp;
1283
0
    }
1284
0
    if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1285
0
        ret[len++] = *p++;
1286
0
    else {
1287
0
        int val = *(unsigned char *)p++;
1288
0
        int hi = val / 0x10, lo = val % 0x10;
1289
0
        ret[len++] = '%';
1290
0
        ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1291
0
        ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1292
0
    }
1293
0
      }
1294
0
  }
1295
4.22M
    }
1296
4.22M
    if (uri->fragment != NULL) {
1297
252k
  if (len + 3 >= max) {
1298
1.86k
            temp = xmlSaveUriRealloc(ret, &max);
1299
1.86k
            if (temp == NULL) goto mem_error;
1300
1.86k
            ret = temp;
1301
1.86k
  }
1302
252k
  ret[len++] = '#';
1303
252k
  p = uri->fragment;
1304
9.50M
  while (*p != 0) {
1305
9.25M
      if (len + 3 >= max) {
1306
38.1k
                temp = xmlSaveUriRealloc(ret, &max);
1307
38.1k
                if (temp == NULL) goto mem_error;
1308
38.1k
                ret = temp;
1309
38.1k
      }
1310
9.25M
      if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1311
9.13M
    ret[len++] = *p++;
1312
119k
      else {
1313
119k
    int val = *(unsigned char *)p++;
1314
119k
    int hi = val / 0x10, lo = val % 0x10;
1315
119k
    ret[len++] = '%';
1316
119k
    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1317
119k
    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1318
119k
      }
1319
9.25M
  }
1320
252k
    }
1321
4.22M
    if (len >= max) {
1322
447
        temp = xmlSaveUriRealloc(ret, &max);
1323
447
        if (temp == NULL) goto mem_error;
1324
447
        ret = temp;
1325
447
    }
1326
4.22M
    ret[len] = 0;
1327
4.22M
    return(ret);
1328
1329
0
mem_error:
1330
0
    xmlFree(ret);
1331
0
    return(NULL);
1332
4.22M
}
1333
1334
/**
1335
 * xmlPrintURI:
1336
 * @stream:  a FILE* for the output
1337
 * @uri:  pointer to an xmlURI
1338
 *
1339
 * Prints the URI in the stream @stream.
1340
 */
1341
void
1342
0
xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1343
0
    xmlChar *out;
1344
1345
0
    out = xmlSaveUri(uri);
1346
0
    if (out != NULL) {
1347
0
  fprintf(stream, "%s", (char *) out);
1348
0
  xmlFree(out);
1349
0
    }
1350
0
}
1351
1352
/**
1353
 * xmlCleanURI:
1354
 * @uri:  pointer to an xmlURI
1355
 *
1356
 * Make sure the xmlURI struct is free of content
1357
 */
1358
static void
1359
21.8M
xmlCleanURI(xmlURIPtr uri) {
1360
21.8M
    if (uri == NULL) return;
1361
1362
21.8M
    if (uri->scheme != NULL) xmlFree(uri->scheme);
1363
21.8M
    uri->scheme = NULL;
1364
21.8M
    if (uri->server != NULL) xmlFree(uri->server);
1365
21.8M
    uri->server = NULL;
1366
21.8M
    if (uri->user != NULL) xmlFree(uri->user);
1367
21.8M
    uri->user = NULL;
1368
21.8M
    if (uri->path != NULL) xmlFree(uri->path);
1369
21.8M
    uri->path = NULL;
1370
21.8M
    if (uri->fragment != NULL) xmlFree(uri->fragment);
1371
21.8M
    uri->fragment = NULL;
1372
21.8M
    if (uri->opaque != NULL) xmlFree(uri->opaque);
1373
21.8M
    uri->opaque = NULL;
1374
21.8M
    if (uri->authority != NULL) xmlFree(uri->authority);
1375
21.8M
    uri->authority = NULL;
1376
21.8M
    if (uri->query != NULL) xmlFree(uri->query);
1377
21.8M
    uri->query = NULL;
1378
21.8M
    if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1379
21.8M
    uri->query_raw = NULL;
1380
21.8M
}
1381
1382
/**
1383
 * xmlFreeURI:
1384
 * @uri:  pointer to an xmlURI
1385
 *
1386
 * Free up the xmlURI struct
1387
 */
1388
void
1389
10.5M
xmlFreeURI(xmlURIPtr uri) {
1390
10.5M
    if (uri == NULL) return;
1391
1392
10.5M
    if (uri->scheme != NULL) xmlFree(uri->scheme);
1393
10.5M
    if (uri->server != NULL) xmlFree(uri->server);
1394
10.5M
    if (uri->user != NULL) xmlFree(uri->user);
1395
10.5M
    if (uri->path != NULL) xmlFree(uri->path);
1396
10.5M
    if (uri->fragment != NULL) xmlFree(uri->fragment);
1397
10.5M
    if (uri->opaque != NULL) xmlFree(uri->opaque);
1398
10.5M
    if (uri->authority != NULL) xmlFree(uri->authority);
1399
10.5M
    if (uri->query != NULL) xmlFree(uri->query);
1400
10.5M
    if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1401
10.5M
    xmlFree(uri);
1402
10.5M
}
1403
1404
/************************************************************************
1405
 *                  *
1406
 *      Helper functions        *
1407
 *                  *
1408
 ************************************************************************/
1409
1410
/**
1411
 * xmlNormalizeURIPath:
1412
 * @path:  pointer to the path string
1413
 *
1414
 * Applies the 5 normalization steps to a path string--that is, RFC 2396
1415
 * Section 5.2, steps 6.c through 6.g.
1416
 *
1417
 * Normalization occurs directly on the string, no new allocation is done
1418
 *
1419
 * Returns 0 or an error code
1420
 */
1421
int
1422
350k
xmlNormalizeURIPath(char *path) {
1423
350k
    char *cur, *out;
1424
1425
350k
    if (path == NULL)
1426
0
  return(-1);
1427
1428
    /* Skip all initial "/" chars.  We want to get to the beginning of the
1429
     * first non-empty segment.
1430
     */
1431
350k
    cur = path;
1432
406k
    while (cur[0] == '/')
1433
55.6k
      ++cur;
1434
350k
    if (cur[0] == '\0')
1435
106
      return(0);
1436
1437
    /* Keep everything we've seen so far.  */
1438
350k
    out = cur;
1439
1440
    /*
1441
     * Analyze each segment in sequence for cases (c) and (d).
1442
     */
1443
5.43M
    while (cur[0] != '\0') {
1444
  /*
1445
   * c) All occurrences of "./", where "." is a complete path segment,
1446
   *    are removed from the buffer string.
1447
   */
1448
5.43M
  if ((cur[0] == '.') && (cur[1] == '/')) {
1449
3.81k
      cur += 2;
1450
      /* '//' normalization should be done at this point too */
1451
8.22k
      while (cur[0] == '/')
1452
4.40k
    cur++;
1453
3.81k
      continue;
1454
3.81k
  }
1455
1456
  /*
1457
   * d) If the buffer string ends with "." as a complete path segment,
1458
   *    that "." is removed.
1459
   */
1460
5.43M
  if ((cur[0] == '.') && (cur[1] == '\0'))
1461
3.37k
      break;
1462
1463
  /* Otherwise keep the segment.  */
1464
206M
  while (cur[0] != '/') {
1465
201M
            if (cur[0] == '\0')
1466
346k
              goto done_cd;
1467
201M
      (out++)[0] = (cur++)[0];
1468
201M
  }
1469
  /* normalize // */
1470
5.24M
  while ((cur[0] == '/') && (cur[1] == '/'))
1471
159k
      cur++;
1472
1473
5.08M
        (out++)[0] = (cur++)[0];
1474
5.08M
    }
1475
350k
 done_cd:
1476
350k
    out[0] = '\0';
1477
1478
    /* Reset to the beginning of the first segment for the next sequence.  */
1479
350k
    cur = path;
1480
406k
    while (cur[0] == '/')
1481
55.6k
      ++cur;
1482
350k
    if (cur[0] == '\0')
1483
3.35k
  return(0);
1484
1485
    /*
1486
     * Analyze each segment in sequence for cases (e) and (f).
1487
     *
1488
     * e) All occurrences of "<segment>/../", where <segment> is a
1489
     *    complete path segment not equal to "..", are removed from the
1490
     *    buffer string.  Removal of these path segments is performed
1491
     *    iteratively, removing the leftmost matching pattern on each
1492
     *    iteration, until no matching pattern remains.
1493
     *
1494
     * f) If the buffer string ends with "<segment>/..", where <segment>
1495
     *    is a complete path segment not equal to "..", that
1496
     *    "<segment>/.." is removed.
1497
     *
1498
     * To satisfy the "iterative" clause in (e), we need to collapse the
1499
     * string every time we find something that needs to be removed.  Thus,
1500
     * we don't need to keep two pointers into the string: we only need a
1501
     * "current position" pointer.
1502
     */
1503
5.42M
    while (1) {
1504
5.42M
        char *segp, *tmp;
1505
1506
        /* At the beginning of each iteration of this loop, "cur" points to
1507
         * the first character of the segment we want to examine.
1508
         */
1509
1510
        /* Find the end of the current segment.  */
1511
5.42M
        segp = cur;
1512
208M
        while ((segp[0] != '/') && (segp[0] != '\0'))
1513
203M
          ++segp;
1514
1515
        /* If this is the last segment, we're done (we need at least two
1516
         * segments to meet the criteria for the (e) and (f) cases).
1517
         */
1518
5.42M
        if (segp[0] == '\0')
1519
345k
          break;
1520
1521
        /* If the first segment is "..", or if the next segment _isn't_ "..",
1522
         * keep this segment and try the next one.
1523
         */
1524
5.07M
        ++segp;
1525
5.07M
        if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1526
5.07M
            || ((segp[0] != '.') || (segp[1] != '.')
1527
5.02M
                || ((segp[2] != '/') && (segp[2] != '\0')))) {
1528
5.02M
          cur = segp;
1529
5.02M
          continue;
1530
5.02M
        }
1531
1532
        /* If we get here, remove this segment and the next one and back up
1533
         * to the previous segment (if there is one), to implement the
1534
         * "iteratively" clause.  It's pretty much impossible to back up
1535
         * while maintaining two pointers into the buffer, so just compact
1536
         * the whole buffer now.
1537
         */
1538
1539
        /* If this is the end of the buffer, we're done.  */
1540
52.9k
        if (segp[2] == '\0') {
1541
1.29k
          cur[0] = '\0';
1542
1.29k
          break;
1543
1.29k
        }
1544
        /* Valgrind complained, strcpy(cur, segp + 3); */
1545
        /* string will overlap, do not use strcpy */
1546
51.6k
        tmp = cur;
1547
51.6k
        segp += 3;
1548
66.4M
        while ((*tmp++ = *segp++) != 0)
1549
66.3M
          ;
1550
1551
        /* If there are no previous segments, then keep going from here.  */
1552
51.6k
        segp = cur;
1553
96.5k
        while ((segp > path) && ((--segp)[0] == '/'))
1554
44.8k
          ;
1555
51.6k
        if (segp == path)
1556
8.00k
          continue;
1557
1558
        /* "segp" is pointing to the end of a previous segment; find it's
1559
         * start.  We need to back up to the previous segment and start
1560
         * over with that to handle things like "foo/bar/../..".  If we
1561
         * don't do this, then on the first pass we'll remove the "bar/..",
1562
         * but be pointing at the second ".." so we won't realize we can also
1563
         * remove the "foo/..".
1564
         */
1565
43.6k
        cur = segp;
1566
1.57M
        while ((cur > path) && (cur[-1] != '/'))
1567
1.53M
          --cur;
1568
43.6k
    }
1569
347k
    out[0] = '\0';
1570
1571
    /*
1572
     * g) If the resulting buffer string still begins with one or more
1573
     *    complete path segments of "..", then the reference is
1574
     *    considered to be in error. Implementations may handle this
1575
     *    error by retaining these components in the resolved path (i.e.,
1576
     *    treating them as part of the final URI), by removing them from
1577
     *    the resolved path (i.e., discarding relative levels above the
1578
     *    root), or by avoiding traversal of the reference.
1579
     *
1580
     * We discard them from the final path.
1581
     */
1582
347k
    if (path[0] == '/') {
1583
33.4k
      cur = path;
1584
39.2k
      while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1585
39.2k
             && ((cur[3] == '/') || (cur[3] == '\0')))
1586
5.78k
  cur += 3;
1587
1588
33.4k
      if (cur != path) {
1589
2.63k
  out = path;
1590
518k
  while (cur[0] != '\0')
1591
516k
          (out++)[0] = (cur++)[0];
1592
2.63k
  out[0] = 0;
1593
2.63k
      }
1594
33.4k
    }
1595
1596
347k
    return(0);
1597
350k
}
1598
1599
426M
static int is_hex(char c) {
1600
426M
    if (((c >= '0') && (c <= '9')) ||
1601
426M
        ((c >= 'a') && (c <= 'f')) ||
1602
426M
        ((c >= 'A') && (c <= 'F')))
1603
426M
  return(1);
1604
38.7k
    return(0);
1605
426M
}
1606
1607
/**
1608
 * xmlURIUnescapeString:
1609
 * @str:  the string to unescape
1610
 * @len:   the length in bytes to unescape (or <= 0 to indicate full string)
1611
 * @target:  optional destination buffer
1612
 *
1613
 * Unescaping routine, but does not check that the string is an URI. The
1614
 * output is a direct unsigned char translation of %XX values (no encoding)
1615
 * Note that the length of the result can only be smaller or same size as
1616
 * the input string.
1617
 *
1618
 * Returns a copy of the string, but unescaped, will return NULL only in case
1619
 * of error
1620
 */
1621
char *
1622
10.8M
xmlURIUnescapeString(const char *str, int len, char *target) {
1623
10.8M
    char *ret, *out;
1624
10.8M
    const char *in;
1625
1626
10.8M
    if (str == NULL)
1627
0
  return(NULL);
1628
10.8M
    if (len <= 0) len = strlen(str);
1629
10.8M
    if (len < 0) return(NULL);
1630
1631
10.8M
    if (target == NULL) {
1632
10.8M
  ret = (char *) xmlMallocAtomic(len + 1);
1633
10.8M
  if (ret == NULL) {
1634
0
            xmlURIErrMemory("unescaping URI value\n");
1635
0
      return(NULL);
1636
0
  }
1637
10.8M
    } else
1638
0
  ret = target;
1639
10.8M
    in = str;
1640
10.8M
    out = ret;
1641
906M
    while(len > 0) {
1642
895M
  if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1643
213M
            int c = 0;
1644
213M
      in++;
1645
213M
      if ((*in >= '0') && (*in <= '9'))
1646
204M
          c = (*in - '0');
1647
8.59M
      else if ((*in >= 'a') && (*in <= 'f'))
1648
90.6k
          c = (*in - 'a') + 10;
1649
8.50M
      else if ((*in >= 'A') && (*in <= 'F'))
1650
8.50M
          c = (*in - 'A') + 10;
1651
213M
      in++;
1652
213M
      if ((*in >= '0') && (*in <= '9'))
1653
135M
          c = c * 16 + (*in - '0');
1654
77.7M
      else if ((*in >= 'a') && (*in <= 'f'))
1655
92.4k
          c = c * 16 + (*in - 'a') + 10;
1656
77.6M
      else if ((*in >= 'A') && (*in <= 'F'))
1657
77.6M
          c = c * 16 + (*in - 'A') + 10;
1658
213M
      in++;
1659
213M
      len -= 3;
1660
            /* Explicit sign change */
1661
213M
      *out++ = (char) c;
1662
682M
  } else {
1663
682M
      *out++ = *in++;
1664
682M
      len--;
1665
682M
  }
1666
895M
    }
1667
10.8M
    *out = 0;
1668
10.8M
    return(ret);
1669
10.8M
}
1670
1671
/**
1672
 * xmlURIEscapeStr:
1673
 * @str:  string to escape
1674
 * @list: exception list string of chars not to escape
1675
 *
1676
 * This routine escapes a string to hex, ignoring reserved characters (a-z)
1677
 * and the characters in the exception list.
1678
 *
1679
 * Returns a new escaped string or NULL in case of error.
1680
 */
1681
xmlChar *
1682
129k
xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1683
129k
    xmlChar *ret, ch;
1684
129k
    xmlChar *temp;
1685
129k
    const xmlChar *in;
1686
129k
    int len, out;
1687
1688
129k
    if (str == NULL)
1689
0
  return(NULL);
1690
129k
    if (str[0] == 0)
1691
379
  return(xmlStrdup(str));
1692
129k
    len = xmlStrlen(str);
1693
129k
    if (!(len > 0)) return(NULL);
1694
1695
129k
    len += 20;
1696
129k
    ret = (xmlChar *) xmlMallocAtomic(len);
1697
129k
    if (ret == NULL) {
1698
0
        xmlURIErrMemory("escaping URI value\n");
1699
0
  return(NULL);
1700
0
    }
1701
129k
    in = (const xmlChar *) str;
1702
129k
    out = 0;
1703
41.3M
    while(*in != 0) {
1704
41.2M
  if (len - out <= 3) {
1705
54.9k
            temp = xmlSaveUriRealloc(ret, &len);
1706
54.9k
      if (temp == NULL) {
1707
0
                xmlURIErrMemory("escaping URI value\n");
1708
0
    xmlFree(ret);
1709
0
    return(NULL);
1710
0
      }
1711
54.9k
      ret = temp;
1712
54.9k
  }
1713
1714
41.2M
  ch = *in;
1715
1716
41.2M
  if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1717
10.4M
      unsigned char val;
1718
10.4M
      ret[out++] = '%';
1719
10.4M
      val = ch >> 4;
1720
10.4M
      if (val <= 9)
1721
9.85M
    ret[out++] = '0' + val;
1722
636k
      else
1723
636k
    ret[out++] = 'A' + val - 0xA;
1724
10.4M
      val = ch & 0xF;
1725
10.4M
      if (val <= 9)
1726
6.69M
    ret[out++] = '0' + val;
1727
3.79M
      else
1728
3.79M
    ret[out++] = 'A' + val - 0xA;
1729
10.4M
      in++;
1730
30.7M
  } else {
1731
30.7M
      ret[out++] = *in++;
1732
30.7M
  }
1733
1734
41.2M
    }
1735
129k
    ret[out] = 0;
1736
129k
    return(ret);
1737
129k
}
1738
1739
/**
1740
 * xmlURIEscape:
1741
 * @str:  the string of the URI to escape
1742
 *
1743
 * Escaping routine, does not do validity checks !
1744
 * It will try to escape the chars needing this, but this is heuristic
1745
 * based it's impossible to be sure.
1746
 *
1747
 * Returns an copy of the string, but escaped
1748
 *
1749
 * 25 May 2001
1750
 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1751
 * according to RFC2396.
1752
 *   - Carl Douglas
1753
 */
1754
xmlChar *
1755
xmlURIEscape(const xmlChar * str)
1756
120k
{
1757
120k
    xmlChar *ret, *segment = NULL;
1758
120k
    xmlURIPtr uri;
1759
120k
    int ret2;
1760
1761
120k
    if (str == NULL)
1762
19.1k
        return (NULL);
1763
1764
101k
    uri = xmlCreateURI();
1765
101k
    if (uri != NULL) {
1766
  /*
1767
   * Allow escaping errors in the unescaped form
1768
   */
1769
101k
        uri->cleanup = 1;
1770
101k
        ret2 = xmlParseURIReference(uri, (const char *)str);
1771
101k
        if (ret2) {
1772
6.56k
            xmlFreeURI(uri);
1773
6.56k
            return (NULL);
1774
6.56k
        }
1775
101k
    }
1776
1777
95.0k
    if (!uri)
1778
0
        return NULL;
1779
1780
95.0k
    ret = NULL;
1781
1782
98.0k
#define NULLCHK(p) if(!p) { \
1783
0
         xmlURIErrMemory("escaping URI value\n"); \
1784
0
         xmlFreeURI(uri); \
1785
0
         xmlFree(ret); \
1786
0
         return NULL; } \
1787
95.0k
1788
95.0k
    if (uri->scheme) {
1789
2.80k
        segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1790
2.80k
        NULLCHK(segment)
1791
2.80k
        ret = xmlStrcat(ret, segment);
1792
2.80k
        ret = xmlStrcat(ret, BAD_CAST ":");
1793
2.80k
        xmlFree(segment);
1794
2.80k
    }
1795
1796
95.0k
    if (uri->authority) {
1797
0
        segment =
1798
0
            xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1799
0
        NULLCHK(segment)
1800
0
        ret = xmlStrcat(ret, BAD_CAST "//");
1801
0
        ret = xmlStrcat(ret, segment);
1802
0
        xmlFree(segment);
1803
0
    }
1804
1805
95.0k
    if (uri->user) {
1806
740
        segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1807
740
        NULLCHK(segment)
1808
740
        ret = xmlStrcat(ret,BAD_CAST "//");
1809
740
        ret = xmlStrcat(ret, segment);
1810
740
        ret = xmlStrcat(ret, BAD_CAST "@");
1811
740
        xmlFree(segment);
1812
740
    }
1813
1814
95.0k
    if (uri->server) {
1815
1.38k
        segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1816
1.38k
        NULLCHK(segment)
1817
1.38k
        if (uri->user == NULL)
1818
874
            ret = xmlStrcat(ret, BAD_CAST "//");
1819
1.38k
        ret = xmlStrcat(ret, segment);
1820
1.38k
        xmlFree(segment);
1821
1.38k
    }
1822
1823
95.0k
    if (uri->port) {
1824
739
        xmlChar port[10];
1825
1826
739
        snprintf((char *) port, 10, "%d", uri->port);
1827
739
        ret = xmlStrcat(ret, BAD_CAST ":");
1828
739
        ret = xmlStrcat(ret, port);
1829
739
    }
1830
1831
95.0k
    if (uri->path) {
1832
42.4k
        segment =
1833
42.4k
            xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1834
42.4k
        NULLCHK(segment)
1835
42.4k
        ret = xmlStrcat(ret, segment);
1836
42.4k
        xmlFree(segment);
1837
42.4k
    }
1838
1839
95.0k
    if (uri->query_raw) {
1840
3.45k
        ret = xmlStrcat(ret, BAD_CAST "?");
1841
3.45k
        ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1842
3.45k
    }
1843
91.5k
    else if (uri->query) {
1844
0
        segment =
1845
0
            xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1846
0
        NULLCHK(segment)
1847
0
        ret = xmlStrcat(ret, BAD_CAST "?");
1848
0
        ret = xmlStrcat(ret, segment);
1849
0
        xmlFree(segment);
1850
0
    }
1851
1852
95.0k
    if (uri->opaque) {
1853
0
        segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1854
0
        NULLCHK(segment)
1855
0
        ret = xmlStrcat(ret, segment);
1856
0
        xmlFree(segment);
1857
0
    }
1858
1859
95.0k
    if (uri->fragment) {
1860
50.7k
        segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1861
50.7k
        NULLCHK(segment)
1862
50.7k
        ret = xmlStrcat(ret, BAD_CAST "#");
1863
50.7k
        ret = xmlStrcat(ret, segment);
1864
50.7k
        xmlFree(segment);
1865
50.7k
    }
1866
1867
95.0k
    xmlFreeURI(uri);
1868
95.0k
#undef NULLCHK
1869
1870
95.0k
    return (ret);
1871
95.0k
}
1872
1873
/************************************************************************
1874
 *                  *
1875
 *      Public functions        *
1876
 *                  *
1877
 ************************************************************************/
1878
1879
/**
1880
 * xmlBuildURI:
1881
 * @URI:  the URI instance found in the document
1882
 * @base:  the base value
1883
 *
1884
 * Computes he final URI of the reference done by checking that
1885
 * the given URI is valid, and building the final URI using the
1886
 * base URI. This is processed according to section 5.2 of the
1887
 * RFC 2396
1888
 *
1889
 * 5.2. Resolving Relative References to Absolute Form
1890
 *
1891
 * Returns a new URI string (to be freed by the caller) or NULL in case
1892
 *         of error.
1893
 */
1894
xmlChar *
1895
3.30M
xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1896
3.30M
    xmlChar *val = NULL;
1897
3.30M
    int ret, len, indx, cur, out;
1898
3.30M
    xmlURIPtr ref = NULL;
1899
3.30M
    xmlURIPtr bas = NULL;
1900
3.30M
    xmlURIPtr res = NULL;
1901
1902
    /*
1903
     * 1) The URI reference is parsed into the potential four components and
1904
     *    fragment identifier, as described in Section 4.3.
1905
     *
1906
     *    NOTE that a completely empty URI is treated by modern browsers
1907
     *    as a reference to "." rather than as a synonym for the current
1908
     *    URI.  Should we do that here?
1909
     */
1910
3.30M
    if (URI == NULL)
1911
8.45k
  ret = -1;
1912
3.29M
    else {
1913
3.29M
  if (*URI) {
1914
3.16M
      ref = xmlCreateURI();
1915
3.16M
      if (ref == NULL)
1916
0
    goto done;
1917
3.16M
      ret = xmlParseURIReference(ref, (const char *) URI);
1918
3.16M
  }
1919
135k
  else
1920
135k
      ret = 0;
1921
3.29M
    }
1922
3.30M
    if (ret != 0)
1923
78.9k
  goto done;
1924
3.22M
    if ((ref != NULL) && (ref->scheme != NULL)) {
1925
  /*
1926
   * The URI is absolute don't modify.
1927
   */
1928
58.6k
  val = xmlStrdup(URI);
1929
58.6k
  goto done;
1930
58.6k
    }
1931
3.16M
    if (base == NULL)
1932
2.49M
  ret = -1;
1933
669k
    else {
1934
669k
  bas = xmlCreateURI();
1935
669k
  if (bas == NULL)
1936
0
      goto done;
1937
669k
  ret = xmlParseURIReference(bas, (const char *) base);
1938
669k
    }
1939
3.16M
    if (ret != 0) {
1940
2.55M
  if (ref)
1941
2.55M
      val = xmlSaveUri(ref);
1942
2.55M
  goto done;
1943
2.55M
    }
1944
607k
    if (ref == NULL) {
1945
  /*
1946
   * the base fragment must be ignored
1947
   */
1948
133k
  if (bas->fragment != NULL) {
1949
2.68k
      xmlFree(bas->fragment);
1950
2.68k
      bas->fragment = NULL;
1951
2.68k
  }
1952
133k
  val = xmlSaveUri(bas);
1953
133k
  goto done;
1954
133k
    }
1955
1956
    /*
1957
     * 2) If the path component is empty and the scheme, authority, and
1958
     *    query components are undefined, then it is a reference to the
1959
     *    current document and we are done.  Otherwise, the reference URI's
1960
     *    query and fragment components are defined as found (or not found)
1961
     *    within the URI reference and not inherited from the base URI.
1962
     *
1963
     *    NOTE that in modern browsers, the parsing differs from the above
1964
     *    in the following aspect:  the query component is allowed to be
1965
     *    defined while still treating this as a reference to the current
1966
     *    document.
1967
     */
1968
473k
    res = xmlCreateURI();
1969
473k
    if (res == NULL)
1970
0
  goto done;
1971
473k
    if ((ref->scheme == NULL) && (ref->path == NULL) &&
1972
473k
  ((ref->authority == NULL) && (ref->server == NULL))) {
1973
120k
  if (bas->scheme != NULL)
1974
5.83k
      res->scheme = xmlMemStrdup(bas->scheme);
1975
120k
  if (bas->authority != NULL)
1976
0
      res->authority = xmlMemStrdup(bas->authority);
1977
120k
  else if ((bas->server != NULL) || (bas->port == -1)) {
1978
2.84k
      if (bas->server != NULL)
1979
2.14k
    res->server = xmlMemStrdup(bas->server);
1980
2.84k
      if (bas->user != NULL)
1981
1.14k
    res->user = xmlMemStrdup(bas->user);
1982
2.84k
      res->port = bas->port;
1983
2.84k
  }
1984
120k
  if (bas->path != NULL)
1985
115k
      res->path = xmlMemStrdup(bas->path);
1986
120k
  if (ref->query_raw != NULL)
1987
741
      res->query_raw = xmlMemStrdup (ref->query_raw);
1988
120k
  else if (ref->query != NULL)
1989
0
      res->query = xmlMemStrdup(ref->query);
1990
120k
  else if (bas->query_raw != NULL)
1991
3.12k
      res->query_raw = xmlMemStrdup(bas->query_raw);
1992
117k
  else if (bas->query != NULL)
1993
0
      res->query = xmlMemStrdup(bas->query);
1994
120k
  if (ref->fragment != NULL)
1995
120k
      res->fragment = xmlMemStrdup(ref->fragment);
1996
120k
  goto step_7;
1997
120k
    }
1998
1999
    /*
2000
     * 3) If the scheme component is defined, indicating that the reference
2001
     *    starts with a scheme name, then the reference is interpreted as an
2002
     *    absolute URI and we are done.  Otherwise, the reference URI's
2003
     *    scheme is inherited from the base URI's scheme component.
2004
     */
2005
352k
    if (ref->scheme != NULL) {
2006
0
  val = xmlSaveUri(ref);
2007
0
  goto done;
2008
0
    }
2009
352k
    if (bas->scheme != NULL)
2010
32.0k
  res->scheme = xmlMemStrdup(bas->scheme);
2011
2012
352k
    if (ref->query_raw != NULL)
2013
1.81k
  res->query_raw = xmlMemStrdup(ref->query_raw);
2014
350k
    else if (ref->query != NULL)
2015
0
  res->query = xmlMemStrdup(ref->query);
2016
352k
    if (ref->fragment != NULL)
2017
36.0k
  res->fragment = xmlMemStrdup(ref->fragment);
2018
2019
    /*
2020
     * 4) If the authority component is defined, then the reference is a
2021
     *    network-path and we skip to step 7.  Otherwise, the reference
2022
     *    URI's authority is inherited from the base URI's authority
2023
     *    component, which will also be undefined if the URI scheme does not
2024
     *    use an authority component.
2025
     */
2026
352k
    if ((ref->authority != NULL) || (ref->server != NULL)) {
2027
580
  if (ref->authority != NULL)
2028
0
      res->authority = xmlMemStrdup(ref->authority);
2029
580
  else {
2030
580
      res->server = xmlMemStrdup(ref->server);
2031
580
      if (ref->user != NULL)
2032
173
    res->user = xmlMemStrdup(ref->user);
2033
580
            res->port = ref->port;
2034
580
  }
2035
580
  if (ref->path != NULL)
2036
207
      res->path = xmlMemStrdup(ref->path);
2037
580
  goto step_7;
2038
580
    }
2039
351k
    if (bas->authority != NULL)
2040
0
  res->authority = xmlMemStrdup(bas->authority);
2041
351k
    else if ((bas->server != NULL) || (bas->port == -1)) {
2042
20.8k
  if (bas->server != NULL)
2043
14.6k
      res->server = xmlMemStrdup(bas->server);
2044
20.8k
  if (bas->user != NULL)
2045
8.34k
      res->user = xmlMemStrdup(bas->user);
2046
20.8k
  res->port = bas->port;
2047
20.8k
    }
2048
2049
    /*
2050
     * 5) If the path component begins with a slash character ("/"), then
2051
     *    the reference is an absolute-path and we skip to step 7.
2052
     */
2053
351k
    if ((ref->path != NULL) && (ref->path[0] == '/')) {
2054
959
  res->path = xmlMemStrdup(ref->path);
2055
959
  goto step_7;
2056
959
    }
2057
2058
2059
    /*
2060
     * 6) If this step is reached, then we are resolving a relative-path
2061
     *    reference.  The relative path needs to be merged with the base
2062
     *    URI's path.  Although there are many ways to do this, we will
2063
     *    describe a simple method using a separate string buffer.
2064
     *
2065
     * Allocate a buffer large enough for the result string.
2066
     */
2067
350k
    len = 2; /* extra / and 0 */
2068
350k
    if (ref->path != NULL)
2069
350k
  len += strlen(ref->path);
2070
350k
    if (bas->path != NULL)
2071
317k
  len += strlen(bas->path);
2072
350k
    res->path = (char *) xmlMallocAtomic(len);
2073
350k
    if (res->path == NULL) {
2074
0
        xmlURIErrMemory("resolving URI against base\n");
2075
0
  goto done;
2076
0
    }
2077
350k
    res->path[0] = 0;
2078
2079
    /*
2080
     * a) All but the last segment of the base URI's path component is
2081
     *    copied to the buffer.  In other words, any characters after the
2082
     *    last (right-most) slash character, if any, are excluded.
2083
     */
2084
350k
    cur = 0;
2085
350k
    out = 0;
2086
350k
    if (bas->path != NULL) {
2087
5.40M
  while (bas->path[cur] != 0) {
2088
231M
      while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2089
226M
    cur++;
2090
5.40M
      if (bas->path[cur] == 0)
2091
311k
    break;
2092
2093
5.08M
      cur++;
2094
208M
      while (out < cur) {
2095
203M
    res->path[out] = bas->path[out];
2096
203M
    out++;
2097
203M
      }
2098
5.08M
  }
2099
317k
    }
2100
350k
    res->path[out] = 0;
2101
2102
    /*
2103
     * b) The reference's path component is appended to the buffer
2104
     *    string.
2105
     */
2106
350k
    if (ref->path != NULL && ref->path[0] != 0) {
2107
350k
  indx = 0;
2108
  /*
2109
   * Ensure the path includes a '/'
2110
   */
2111
350k
  if ((out == 0) && (bas->server != NULL))
2112
8.22k
      res->path[out++] = '/';
2113
4.21M
  while (ref->path[indx] != 0) {
2114
3.86M
      res->path[out++] = ref->path[indx++];
2115
3.86M
  }
2116
350k
    }
2117
350k
    res->path[out] = 0;
2118
2119
    /*
2120
     * Steps c) to h) are really path normalization steps
2121
     */
2122
350k
    xmlNormalizeURIPath(res->path);
2123
2124
473k
step_7:
2125
2126
    /*
2127
     * 7) The resulting URI components, including any inherited from the
2128
     *    base URI, are recombined to give the absolute form of the URI
2129
     *    reference.
2130
     */
2131
473k
    val = xmlSaveUri(res);
2132
2133
3.30M
done:
2134
3.30M
    if (ref != NULL)
2135
3.16M
  xmlFreeURI(ref);
2136
3.30M
    if (bas != NULL)
2137
669k
  xmlFreeURI(bas);
2138
3.30M
    if (res != NULL)
2139
473k
  xmlFreeURI(res);
2140
3.30M
    return(val);
2141
473k
}
2142
2143
/**
2144
 * xmlBuildRelativeURI:
2145
 * @URI:  the URI reference under consideration
2146
 * @base:  the base value
2147
 *
2148
 * Expresses the URI of the reference in terms relative to the
2149
 * base.  Some examples of this operation include:
2150
 *     base = "http://site1.com/docs/book1.html"
2151
 *        URI input                        URI returned
2152
 *     docs/pic1.gif                    pic1.gif
2153
 *     docs/img/pic1.gif                img/pic1.gif
2154
 *     img/pic1.gif                     ../img/pic1.gif
2155
 *     http://site1.com/docs/pic1.gif   pic1.gif
2156
 *     http://site2.com/docs/pic1.gif   http://site2.com/docs/pic1.gif
2157
 *
2158
 *     base = "docs/book1.html"
2159
 *        URI input                        URI returned
2160
 *     docs/pic1.gif                    pic1.gif
2161
 *     docs/img/pic1.gif                img/pic1.gif
2162
 *     img/pic1.gif                     ../img/pic1.gif
2163
 *     http://site1.com/docs/pic1.gif   http://site1.com/docs/pic1.gif
2164
 *
2165
 *
2166
 * Note: if the URI reference is really weird or complicated, it may be
2167
 *       worthwhile to first convert it into a "nice" one by calling
2168
 *       xmlBuildURI (using 'base') before calling this routine,
2169
 *       since this routine (for reasonable efficiency) assumes URI has
2170
 *       already been through some validation.
2171
 *
2172
 * Returns a new URI string (to be freed by the caller) or NULL in case
2173
 * error.
2174
 */
2175
xmlChar *
2176
xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2177
355
{
2178
355
    xmlChar *val = NULL;
2179
355
    int ret;
2180
355
    int ix;
2181
355
    int nbslash = 0;
2182
355
    int len;
2183
355
    xmlURIPtr ref = NULL;
2184
355
    xmlURIPtr bas = NULL;
2185
355
    xmlChar *bptr, *uptr, *vptr;
2186
355
    int remove_path = 0;
2187
2188
355
    if ((URI == NULL) || (*URI == 0))
2189
0
  return NULL;
2190
2191
    /*
2192
     * First parse URI into a standard form
2193
     */
2194
355
    ref = xmlCreateURI ();
2195
355
    if (ref == NULL)
2196
0
  return NULL;
2197
    /* If URI not already in "relative" form */
2198
355
    if (URI[0] != '.') {
2199
5
  ret = xmlParseURIReference (ref, (const char *) URI);
2200
5
  if (ret != 0)
2201
0
      goto done;   /* Error in URI, return NULL */
2202
5
    } else
2203
350
  ref->path = (char *)xmlStrdup(URI);
2204
2205
    /*
2206
     * Next parse base into the same standard form
2207
     */
2208
355
    if ((base == NULL) || (*base == 0)) {
2209
283
  val = xmlStrdup (URI);
2210
283
  goto done;
2211
283
    }
2212
72
    bas = xmlCreateURI ();
2213
72
    if (bas == NULL)
2214
0
  goto done;
2215
72
    if (base[0] != '.') {
2216
66
  ret = xmlParseURIReference (bas, (const char *) base);
2217
66
  if (ret != 0)
2218
0
      goto done;   /* Error in base, return NULL */
2219
66
    } else
2220
6
  bas->path = (char *)xmlStrdup(base);
2221
2222
    /*
2223
     * If the scheme / server on the URI differs from the base,
2224
     * just return the URI
2225
     */
2226
72
    if ((ref->scheme != NULL) &&
2227
72
  ((bas->scheme == NULL) ||
2228
0
   (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2229
0
   (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
2230
0
  val = xmlStrdup (URI);
2231
0
  goto done;
2232
0
    }
2233
72
    if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2234
0
  val = xmlStrdup(BAD_CAST "");
2235
0
  goto done;
2236
0
    }
2237
72
    if (bas->path == NULL) {
2238
0
  val = xmlStrdup((xmlChar *)ref->path);
2239
0
  goto done;
2240
0
    }
2241
72
    if (ref->path == NULL) {
2242
0
        ref->path = (char *) "/";
2243
0
  remove_path = 1;
2244
0
    }
2245
2246
    /*
2247
     * At this point (at last!) we can compare the two paths
2248
     *
2249
     * First we take care of the special case where either of the
2250
     * two path components may be missing (bug 316224)
2251
     */
2252
72
    bptr = (xmlChar *)bas->path;
2253
72
    {
2254
72
        xmlChar *rptr = (xmlChar *) ref->path;
2255
72
        int pos = 0;
2256
2257
        /*
2258
         * Next we compare the two strings and find where they first differ
2259
         */
2260
72
  if ((*rptr == '.') && (rptr[1] == '/'))
2261
0
            rptr += 2;
2262
72
  if ((*bptr == '.') && (bptr[1] == '/'))
2263
0
            bptr += 2;
2264
72
  else if ((*bptr == '/') && (*rptr != '/'))
2265
0
      bptr++;
2266
78
  while ((bptr[pos] == rptr[pos]) && (bptr[pos] != 0))
2267
6
      pos++;
2268
2269
72
  if (bptr[pos] == rptr[pos]) {
2270
0
      val = xmlStrdup(BAD_CAST "");
2271
0
      goto done;    /* (I can't imagine why anyone would do this) */
2272
0
  }
2273
2274
  /*
2275
   * In URI, "back up" to the last '/' encountered.  This will be the
2276
   * beginning of the "unique" suffix of URI
2277
   */
2278
72
  ix = pos;
2279
78
  for (; ix > 0; ix--) {
2280
6
      if (rptr[ix - 1] == '/')
2281
0
    break;
2282
6
  }
2283
72
  uptr = (xmlChar *)&rptr[ix];
2284
2285
  /*
2286
   * In base, count the number of '/' from the differing point
2287
   */
2288
8.06k
  for (; bptr[ix] != 0; ix++) {
2289
7.99k
      if (bptr[ix] == '/')
2290
0
    nbslash++;
2291
7.99k
  }
2292
2293
  /*
2294
   * e.g: URI="foo/" base="foo/bar" -> "./"
2295
   */
2296
72
  if (nbslash == 0 && !uptr[0]) {
2297
0
      val = xmlStrdup(BAD_CAST "./");
2298
0
      goto done;
2299
0
  }
2300
2301
72
  len = xmlStrlen (uptr) + 1;
2302
72
    }
2303
2304
72
    if (nbslash == 0) {
2305
72
  if (uptr != NULL)
2306
      /* exception characters from xmlSaveUri */
2307
72
      val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2308
72
  goto done;
2309
72
    }
2310
2311
    /*
2312
     * Allocate just enough space for the returned string -
2313
     * length of the remainder of the URI, plus enough space
2314
     * for the "../" groups, plus one for the terminator
2315
     */
2316
0
    val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2317
0
    if (val == NULL) {
2318
0
        xmlURIErrMemory("building relative URI\n");
2319
0
  goto done;
2320
0
    }
2321
0
    vptr = val;
2322
    /*
2323
     * Put in as many "../" as needed
2324
     */
2325
0
    for (; nbslash>0; nbslash--) {
2326
0
  *vptr++ = '.';
2327
0
  *vptr++ = '.';
2328
0
  *vptr++ = '/';
2329
0
    }
2330
    /*
2331
     * Finish up with the end of the URI
2332
     */
2333
0
    if (uptr != NULL) {
2334
0
        if ((vptr > val) && (len > 0) &&
2335
0
      (uptr[0] == '/') && (vptr[-1] == '/')) {
2336
0
      memcpy (vptr, uptr + 1, len - 1);
2337
0
      vptr[len - 2] = 0;
2338
0
  } else {
2339
0
      memcpy (vptr, uptr, len);
2340
0
      vptr[len - 1] = 0;
2341
0
  }
2342
0
    } else {
2343
0
  vptr[len - 1] = 0;
2344
0
    }
2345
2346
    /* escape the freshly-built path */
2347
0
    vptr = val;
2348
  /* exception characters from xmlSaveUri */
2349
0
    val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2350
0
    xmlFree(vptr);
2351
2352
355
done:
2353
    /*
2354
     * Free the working variables
2355
     */
2356
355
    if (remove_path != 0)
2357
0
        ref->path = NULL;
2358
355
    if (ref != NULL)
2359
355
  xmlFreeURI (ref);
2360
355
    if (bas != NULL)
2361
72
  xmlFreeURI (bas);
2362
2363
355
    return val;
2364
0
}
2365
2366
/**
2367
 * xmlCanonicPath:
2368
 * @path:  the resource locator in a filesystem notation
2369
 *
2370
 * Constructs a canonic path from the specified path.
2371
 *
2372
 * Returns a new canonic path, or a duplicate of the path parameter if the
2373
 * construction fails. The caller is responsible for freeing the memory occupied
2374
 * by the returned string. If there is insufficient memory available, or the
2375
 * argument is NULL, the function returns NULL.
2376
 */
2377
#define IS_WINDOWS_PATH(p)          \
2378
  ((p != NULL) &&           \
2379
   (((p[0] >= 'a') && (p[0] <= 'z')) ||     \
2380
    ((p[0] >= 'A') && (p[0] <= 'Z'))) &&      \
2381
   (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2382
xmlChar *
2383
xmlCanonicPath(const xmlChar *path)
2384
3.68M
{
2385
/*
2386
 * For Windows implementations, additional work needs to be done to
2387
 * replace backslashes in pathnames with "forward slashes"
2388
 */
2389
#if defined(_WIN32)
2390
    int len = 0;
2391
    char *p = NULL;
2392
#endif
2393
3.68M
    xmlURIPtr uri;
2394
3.68M
    xmlChar *ret;
2395
3.68M
    const xmlChar *absuri;
2396
2397
3.68M
    if (path == NULL)
2398
0
  return(NULL);
2399
2400
#if defined(_WIN32)
2401
    /*
2402
     * We must not change the backslashes to slashes if the the path
2403
     * starts with \\?\
2404
     * Those paths can be up to 32k characters long.
2405
     * Was added specifically for OpenOffice, those paths can't be converted
2406
     * to URIs anyway.
2407
     */
2408
    if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
2409
        (path[3] == '\\') )
2410
  return xmlStrdup((const xmlChar *) path);
2411
#endif
2412
2413
  /* sanitize filename starting with // so it can be used as URI */
2414
3.68M
    if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2415
3.49k
        path++;
2416
2417
3.68M
    if ((uri = xmlParseURI((const char *) path)) != NULL) {
2418
3.17M
  xmlFreeURI(uri);
2419
3.17M
  return xmlStrdup(path);
2420
3.17M
    }
2421
2422
    /* Check if this is an "absolute uri" */
2423
510k
    absuri = xmlStrstr(path, BAD_CAST "://");
2424
510k
    if (absuri != NULL) {
2425
70.5k
        int l, j;
2426
70.5k
  unsigned char c;
2427
70.5k
  xmlChar *escURI;
2428
2429
        /*
2430
   * this looks like an URI where some parts have not been
2431
   * escaped leading to a parsing problem.  Check that the first
2432
   * part matches a protocol.
2433
   */
2434
70.5k
  l = absuri - path;
2435
  /* Bypass if first part (part before the '://') is > 20 chars */
2436
70.5k
  if ((l <= 0) || (l > 20))
2437
31.6k
      goto path_processing;
2438
  /* Bypass if any non-alpha characters are present in first part */
2439
144k
  for (j = 0;j < l;j++) {
2440
112k
      c = path[j];
2441
112k
      if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2442
7.62k
          goto path_processing;
2443
112k
  }
2444
2445
  /* Escape all except the characters specified in the supplied path */
2446
31.3k
        escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2447
31.3k
  if (escURI != NULL) {
2448
      /* Try parsing the escaped path */
2449
31.3k
      uri = xmlParseURI((const char *) escURI);
2450
      /* If successful, return the escaped string */
2451
31.3k
      if (uri != NULL) {
2452
22.2k
          xmlFreeURI(uri);
2453
22.2k
    return escURI;
2454
22.2k
      }
2455
9.13k
            xmlFree(escURI);
2456
9.13k
  }
2457
31.3k
    }
2458
2459
488k
path_processing:
2460
/* For Windows implementations, replace backslashes with 'forward slashes' */
2461
#if defined(_WIN32)
2462
    /*
2463
     * Create a URI structure
2464
     */
2465
    uri = xmlCreateURI();
2466
    if (uri == NULL) {    /* Guard against 'out of memory' */
2467
        return(NULL);
2468
    }
2469
2470
    len = xmlStrlen(path);
2471
    if ((len > 2) && IS_WINDOWS_PATH(path)) {
2472
        /* make the scheme 'file' */
2473
  uri->scheme = (char *) xmlStrdup(BAD_CAST "file");
2474
  /* allocate space for leading '/' + path + string terminator */
2475
  uri->path = xmlMallocAtomic(len + 2);
2476
  if (uri->path == NULL) {
2477
      xmlFreeURI(uri);  /* Guard against 'out of memory' */
2478
      return(NULL);
2479
  }
2480
  /* Put in leading '/' plus path */
2481
  uri->path[0] = '/';
2482
  p = uri->path + 1;
2483
  strncpy(p, (char *) path, len + 1);
2484
    } else {
2485
  uri->path = (char *) xmlStrdup(path);
2486
  if (uri->path == NULL) {
2487
      xmlFreeURI(uri);
2488
      return(NULL);
2489
  }
2490
  p = uri->path;
2491
    }
2492
    /* Now change all occurrences of '\' to '/' */
2493
    while (*p != '\0') {
2494
  if (*p == '\\')
2495
      *p = '/';
2496
  p++;
2497
    }
2498
2499
    if (uri->scheme == NULL) {
2500
  ret = xmlStrdup((const xmlChar *) uri->path);
2501
    } else {
2502
  ret = xmlSaveUri(uri);
2503
    }
2504
2505
    xmlFreeURI(uri);
2506
#else
2507
488k
    ret = xmlStrdup((const xmlChar *) path);
2508
488k
#endif
2509
488k
    return(ret);
2510
510k
}
2511
2512
/**
2513
 * xmlPathToURI:
2514
 * @path:  the resource locator in a filesystem notation
2515
 *
2516
 * Constructs an URI expressing the existing path
2517
 *
2518
 * Returns a new URI, or a duplicate of the path parameter if the
2519
 * construction fails. The caller is responsible for freeing the memory
2520
 * occupied by the returned string. If there is insufficient memory available,
2521
 * or the argument is NULL, the function returns NULL.
2522
 */
2523
xmlChar *
2524
xmlPathToURI(const xmlChar *path)
2525
457k
{
2526
457k
    xmlURIPtr uri;
2527
457k
    xmlURI temp;
2528
457k
    xmlChar *ret, *cal;
2529
2530
457k
    if (path == NULL)
2531
0
        return(NULL);
2532
2533
457k
    if ((uri = xmlParseURI((const char *) path)) != NULL) {
2534
125k
  xmlFreeURI(uri);
2535
125k
  return xmlStrdup(path);
2536
125k
    }
2537
332k
    cal = xmlCanonicPath(path);
2538
332k
    if (cal == NULL)
2539
0
        return(NULL);
2540
#if defined(_WIN32)
2541
    /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2542
       If 'cal' is a valid URI already then we are done here, as continuing would make
2543
       it invalid. */
2544
    if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2545
  xmlFreeURI(uri);
2546
  return cal;
2547
    }
2548
    /* 'cal' can contain a relative path with backslashes. If that is processed
2549
       by xmlSaveURI, they will be escaped and the external entity loader machinery
2550
       will fail. So convert them to slashes. Misuse 'ret' for walking. */
2551
    ret = cal;
2552
    while (*ret != '\0') {
2553
  if (*ret == '\\')
2554
      *ret = '/';
2555
  ret++;
2556
    }
2557
#endif
2558
332k
    memset(&temp, 0, sizeof(temp));
2559
332k
    temp.path = (char *) cal;
2560
332k
    ret = xmlSaveUri(&temp);
2561
332k
    xmlFree(cal);
2562
332k
    return(ret);
2563
332k
}