Coverage Report

Created: 2026-05-16 06:38

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libxml2-2.9.7/uri.c
Line
Count
Source
1
/**
2
 * uri.c: set of generic URI related routines
3
 *
4
 * Reference: RFCs 3986, 2732 and 2373
5
 *
6
 * See Copyright for the status of this software.
7
 *
8
 * daniel@veillard.com
9
 */
10
11
#define IN_LIBXML
12
#include "libxml.h"
13
14
#include <string.h>
15
16
#include <libxml/xmlmemory.h>
17
#include <libxml/uri.h>
18
#include <libxml/globals.h>
19
#include <libxml/xmlerror.h>
20
21
/**
22
 * MAX_URI_LENGTH:
23
 *
24
 * The definition of the URI regexp in the above RFC has no size limit
25
 * In practice they are usually relativey short except for the
26
 * data URI scheme as defined in RFC 2397. Even for data URI the usual
27
 * maximum size before hitting random practical limits is around 64 KB
28
 * and 4KB is usually a maximum admitted limit for proper operations.
29
 * The value below is more a security limit than anything else and
30
 * really should never be hit by 'normal' operations
31
 * Set to 1 MByte in 2012, this is only enforced on output
32
 */
33
4.26k
#define MAX_URI_LENGTH 1024 * 1024
34
35
static void
36
xmlURIErrMemory(const char *extra)
37
0
{
38
0
    if (extra)
39
0
        __xmlRaiseError(NULL, NULL, NULL,
40
0
                        NULL, NULL, XML_FROM_URI,
41
0
                        XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
42
0
                        extra, NULL, NULL, 0, 0,
43
0
                        "Memory allocation failed : %s\n", extra);
44
0
    else
45
0
        __xmlRaiseError(NULL, NULL, NULL,
46
0
                        NULL, NULL, XML_FROM_URI,
47
0
                        XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
48
0
                        NULL, NULL, NULL, 0, 0,
49
0
                        "Memory allocation failed\n");
50
0
}
51
52
static void xmlCleanURI(xmlURIPtr uri);
53
54
/*
55
 * Old rule from 2396 used in legacy handling code
56
 * alpha    = lowalpha | upalpha
57
 */
58
5.02M
#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
59
60
61
/*
62
 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
63
 *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
64
 *            "u" | "v" | "w" | "x" | "y" | "z"
65
 */
66
67
5.02M
#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
68
69
/*
70
 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
71
 *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
72
 *           "U" | "V" | "W" | "X" | "Y" | "Z"
73
 */
74
2.16M
#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
75
76
#ifdef IS_DIGIT
77
#undef IS_DIGIT
78
#endif
79
/*
80
 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
81
 */
82
1.97M
#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
83
84
/*
85
 * alphanum = alpha | digit
86
 */
87
88
5.02M
#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
89
90
/*
91
 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
92
 */
93
94
1.75M
#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||     \
95
1.75M
    ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||    \
96
1.75M
    ((x) == '(') || ((x) == ')'))
97
98
/*
99
 * unwise = "{" | "}" | "|" | "\" | "^" | "`"
100
 */
101
102
#define IS_UNWISE(p)                                                    \
103
0
      (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \
104
0
       ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||        \
105
0
       ((*(p) == ']')) || ((*(p) == '`')))
106
/*
107
 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
108
 *            "[" | "]"
109
 */
110
111
391k
#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
112
391k
        ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
113
391k
        ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
114
391k
        ((x) == ']'))
115
116
/*
117
 * unreserved = alphanum | mark
118
 */
119
120
2.51M
#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
121
122
/*
123
 * Skip to next pointer char, handle escaped sequences
124
 */
125
126
17.9M
#define NEXT(p) ((*p == '%')? p += 3 : p++)
127
128
/*
129
 * Productions from the spec.
130
 *
131
 *    authority     = server | reg_name
132
 *    reg_name      = 1*( unreserved | escaped | "$" | "," |
133
 *                        ";" | ":" | "@" | "&" | "=" | "+" )
134
 *
135
 * path          = [ abs_path | opaque_part ]
136
 */
137
138
457k
#define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
139
140
/************************************************************************
141
 *                  *
142
 *                         RFC 3986 parser        *
143
 *                  *
144
 ************************************************************************/
145
146
14.3M
#define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
147
24.3M
#define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||   \
148
24.3M
                      ((*(p) >= 'A') && (*(p) <= 'Z')))
149
#define ISA_HEXDIG(p)             \
150
891k
       (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||   \
151
891k
        ((*(p) >= 'A') && (*(p) <= 'F')))
152
153
/*
154
 *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
155
 *                     / "*" / "+" / "," / ";" / "="
156
 */
157
#define ISA_SUB_DELIM(p)            \
158
23.0M
      (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||   \
159
6.04M
       ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||   \
160
6.04M
       ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||   \
161
6.04M
       ((*(p) == '=')) || ((*(p) == '\'')))
162
163
/*
164
 *    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
165
 */
166
#define ISA_GEN_DELIM(p)            \
167
      (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) ||         \
168
       ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) ||         \
169
       ((*(p) == '@')))
170
171
/*
172
 *    reserved      = gen-delims / sub-delims
173
 */
174
#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
175
176
/*
177
 *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
178
 */
179
#define ISA_UNRESERVED(p)           \
180
40.6M
      ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||   \
181
20.3M
       ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
182
183
/*
184
 *    pct-encoded   = "%" HEXDIG HEXDIG
185
 */
186
#define ISA_PCT_ENCODED(p)            \
187
26.6M
     ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
188
189
/*
190
 *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
191
 */
192
#define ISA_PCHAR(p)              \
193
20.5M
     (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||  \
194
13.5M
      ((*(p) == ':')) || ((*(p) == '@')))
195
196
/**
197
 * xmlParse3986Scheme:
198
 * @uri:  pointer to an URI structure
199
 * @str:  pointer to the string to analyze
200
 *
201
 * Parse an URI scheme
202
 *
203
 * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
204
 *
205
 * Returns 0 or the error code
206
 */
207
static int
208
939k
xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
209
939k
    const char *cur;
210
211
939k
    if (str == NULL)
212
0
  return(-1);
213
214
939k
    cur = *str;
215
939k
    if (!ISA_ALPHA(cur))
216
546k
  return(2);
217
393k
    cur++;
218
1.54M
    while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
219
1.15M
           (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
220
393k
    if (uri != NULL) {
221
393k
  if (uri->scheme != NULL) xmlFree(uri->scheme);
222
393k
  uri->scheme = STRNDUP(*str, cur - *str);
223
393k
    }
224
393k
    *str = cur;
225
393k
    return(0);
226
939k
}
227
228
/**
229
 * xmlParse3986Fragment:
230
 * @uri:  pointer to an URI structure
231
 * @str:  pointer to the string to analyze
232
 *
233
 * Parse the query part of an URI
234
 *
235
 * fragment      = *( pchar / "/" / "?" )
236
 * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
237
 *       in the fragment identifier but this is used very broadly for
238
 *       xpointer scheme selection, so we are allowing it here to not break
239
 *       for example all the DocBook processing chains.
240
 *
241
 * Returns 0 or the error code
242
 */
243
static int
244
xmlParse3986Fragment(xmlURIPtr uri, const char **str)
245
51.5k
{
246
51.5k
    const char *cur;
247
248
51.5k
    if (str == NULL)
249
0
        return (-1);
250
251
51.5k
    cur = *str;
252
253
3.00M
    while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
254
65.1k
           (*cur == '[') || (*cur == ']') ||
255
51.5k
           ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
256
2.95M
        NEXT(cur);
257
51.5k
    if (uri != NULL) {
258
51.5k
        if (uri->fragment != NULL)
259
0
            xmlFree(uri->fragment);
260
51.5k
  if (uri->cleanup & 2)
261
0
      uri->fragment = STRNDUP(*str, cur - *str);
262
51.5k
  else
263
51.5k
      uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
264
51.5k
    }
265
51.5k
    *str = cur;
266
51.5k
    return (0);
267
51.5k
}
268
269
/**
270
 * xmlParse3986Query:
271
 * @uri:  pointer to an URI structure
272
 * @str:  pointer to the string to analyze
273
 *
274
 * Parse the query part of an URI
275
 *
276
 * query = *uric
277
 *
278
 * Returns 0 or the error code
279
 */
280
static int
281
xmlParse3986Query(xmlURIPtr uri, const char **str)
282
63.9k
{
283
63.9k
    const char *cur;
284
285
63.9k
    if (str == NULL)
286
0
        return (-1);
287
288
63.9k
    cur = *str;
289
290
1.48M
    while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
291
63.9k
           ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
292
1.42M
        NEXT(cur);
293
63.9k
    if (uri != NULL) {
294
63.9k
        if (uri->query != NULL)
295
0
            xmlFree(uri->query);
296
63.9k
  if (uri->cleanup & 2)
297
0
      uri->query = STRNDUP(*str, cur - *str);
298
63.9k
  else
299
63.9k
      uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
300
301
  /* Save the raw bytes of the query as well.
302
   * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
303
   */
304
63.9k
  if (uri->query_raw != NULL)
305
0
      xmlFree (uri->query_raw);
306
63.9k
  uri->query_raw = STRNDUP (*str, cur - *str);
307
63.9k
    }
308
63.9k
    *str = cur;
309
63.9k
    return (0);
310
63.9k
}
311
312
/**
313
 * xmlParse3986Port:
314
 * @uri:  pointer to an URI structure
315
 * @str:  the string to analyze
316
 *
317
 * Parse a port part and fills in the appropriate fields
318
 * of the @uri structure
319
 *
320
 * port          = *DIGIT
321
 *
322
 * Returns 0 or the error code
323
 */
324
static int
325
xmlParse3986Port(xmlURIPtr uri, const char **str)
326
30.6k
{
327
30.6k
    const char *cur = *str;
328
30.6k
    unsigned port = 0; /* unsigned for defined overflow behavior */
329
330
30.6k
    if (ISA_DIGIT(cur)) {
331
42.7k
  while (ISA_DIGIT(cur)) {
332
34.2k
      port = port * 10 + (*cur - '0');
333
334
34.2k
      cur++;
335
34.2k
  }
336
8.49k
  if (uri != NULL)
337
8.49k
      uri->port = port & INT_MAX; /* port value modulo INT_MAX+1 */
338
8.49k
  *str = cur;
339
8.49k
  return(0);
340
8.49k
    }
341
22.1k
    return(1);
342
30.6k
}
343
344
/**
345
 * xmlParse3986Userinfo:
346
 * @uri:  pointer to an URI structure
347
 * @str:  the string to analyze
348
 *
349
 * Parse an user informations part and fills in the appropriate fields
350
 * of the @uri structure
351
 *
352
 * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
353
 *
354
 * Returns 0 or the error code
355
 */
356
static int
357
xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
358
235k
{
359
235k
    const char *cur;
360
361
235k
    cur = *str;
362
3.46M
    while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
363
497k
           ISA_SUB_DELIM(cur) || (*cur == ':'))
364
3.22M
  NEXT(cur);
365
235k
    if (*cur == '@') {
366
15.3k
  if (uri != NULL) {
367
15.3k
      if (uri->user != NULL) xmlFree(uri->user);
368
15.3k
      if (uri->cleanup & 2)
369
0
    uri->user = STRNDUP(*str, cur - *str);
370
15.3k
      else
371
15.3k
    uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
372
15.3k
  }
373
15.3k
  *str = cur;
374
15.3k
  return(0);
375
15.3k
    }
376
220k
    return(1);
377
235k
}
378
379
/**
380
 * xmlParse3986DecOctet:
381
 * @str:  the string to analyze
382
 *
383
 *    dec-octet     = DIGIT                 ; 0-9
384
 *                  / %x31-39 DIGIT         ; 10-99
385
 *                  / "1" 2DIGIT            ; 100-199
386
 *                  / "2" %x30-34 DIGIT     ; 200-249
387
 *                  / "25" %x30-35          ; 250-255
388
 *
389
 * Skip a dec-octet.
390
 *
391
 * Returns 0 if found and skipped, 1 otherwise
392
 */
393
static int
394
70.6k
xmlParse3986DecOctet(const char **str) {
395
70.6k
    const char *cur = *str;
396
397
70.6k
    if (!(ISA_DIGIT(cur)))
398
6.47k
        return(1);
399
64.1k
    if (!ISA_DIGIT(cur+1))
400
24.8k
  cur++;
401
39.3k
    else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
402
21.8k
  cur += 2;
403
17.5k
    else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
404
2.93k
  cur += 3;
405
14.5k
    else if ((*cur == '2') && (*(cur + 1) >= '0') &&
406
8.93k
       (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
407
5.05k
  cur += 3;
408
9.52k
    else if ((*cur == '2') && (*(cur + 1) == '5') &&
409
2.18k
       (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
410
2.18k
  cur += 3;
411
7.34k
    else
412
7.34k
        return(1);
413
56.8k
    *str = cur;
414
56.8k
    return(0);
415
64.1k
}
416
/**
417
 * xmlParse3986Host:
418
 * @uri:  pointer to an URI structure
419
 * @str:  the string to analyze
420
 *
421
 * Parse an host part and fills in the appropriate fields
422
 * of the @uri structure
423
 *
424
 * host          = IP-literal / IPv4address / reg-name
425
 * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
426
 * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
427
 * reg-name      = *( unreserved / pct-encoded / sub-delims )
428
 *
429
 * Returns 0 or the error code
430
 */
431
static int
432
xmlParse3986Host(xmlURIPtr uri, const char **str)
433
235k
{
434
235k
    const char *cur = *str;
435
235k
    const char *host;
436
437
235k
    host = cur;
438
    /*
439
     * IPv6 and future adressing scheme are enclosed between brackets
440
     */
441
235k
    if (*cur == '[') {
442
3.99k
        cur++;
443
603k
  while ((*cur != ']') && (*cur != 0))
444
599k
      cur++;
445
3.99k
  if (*cur != ']')
446
2.34k
      return(1);
447
1.64k
  cur++;
448
1.64k
  goto found;
449
3.99k
    }
450
    /*
451
     * try to parse an IPv4
452
     */
453
231k
    if (ISA_DIGIT(cur)) {
454
47.3k
        if (xmlParse3986DecOctet(&cur) != 0)
455
4.96k
      goto not_ipv4;
456
42.4k
  if (*cur != '.')
457
20.5k
      goto not_ipv4;
458
21.8k
  cur++;
459
21.8k
        if (xmlParse3986DecOctet(&cur) != 0)
460
7.43k
      goto not_ipv4;
461
14.4k
  if (*cur != '.')
462
12.9k
      goto not_ipv4;
463
1.41k
        if (xmlParse3986DecOctet(&cur) != 0)
464
1.41k
      goto not_ipv4;
465
0
  if (*cur != '.')
466
0
      goto not_ipv4;
467
0
        if (xmlParse3986DecOctet(&cur) != 0)
468
0
      goto not_ipv4;
469
0
  goto found;
470
47.3k
not_ipv4:
471
47.3k
        cur = *str;
472
47.3k
    }
473
    /*
474
     * then this should be a hostname which can be empty
475
     */
476
3.27M
    while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
477
3.03M
        NEXT(cur);
478
233k
found:
479
233k
    if (uri != NULL) {
480
233k
  if (uri->authority != NULL) xmlFree(uri->authority);
481
233k
  uri->authority = NULL;
482
233k
  if (uri->server != NULL) xmlFree(uri->server);
483
233k
  if (cur != host) {
484
212k
      if (uri->cleanup & 2)
485
0
    uri->server = STRNDUP(host, cur - host);
486
212k
      else
487
212k
    uri->server = xmlURIUnescapeString(host, cur - host, NULL);
488
212k
  } else
489
20.2k
      uri->server = NULL;
490
233k
    }
491
233k
    *str = cur;
492
233k
    return(0);
493
231k
}
494
495
/**
496
 * xmlParse3986Authority:
497
 * @uri:  pointer to an URI structure
498
 * @str:  the string to analyze
499
 *
500
 * Parse an authority part and fills in the appropriate fields
501
 * of the @uri structure
502
 *
503
 * authority     = [ userinfo "@" ] host [ ":" port ]
504
 *
505
 * Returns 0 or the error code
506
 */
507
static int
508
xmlParse3986Authority(xmlURIPtr uri, const char **str)
509
235k
{
510
235k
    const char *cur;
511
235k
    int ret;
512
513
235k
    cur = *str;
514
    /*
515
     * try to parse an userinfo and check for the trailing @
516
     */
517
235k
    ret = xmlParse3986Userinfo(uri, &cur);
518
235k
    if ((ret != 0) || (*cur != '@'))
519
220k
        cur = *str;
520
15.3k
    else
521
15.3k
        cur++;
522
235k
    ret = xmlParse3986Host(uri, &cur);
523
235k
    if (ret != 0) return(ret);
524
233k
    if (*cur == ':') {
525
30.6k
        cur++;
526
30.6k
        ret = xmlParse3986Port(uri, &cur);
527
30.6k
  if (ret != 0) return(ret);
528
30.6k
    }
529
210k
    *str = cur;
530
210k
    return(0);
531
233k
}
532
533
/**
534
 * xmlParse3986Segment:
535
 * @str:  the string to analyze
536
 * @forbid: an optional forbidden character
537
 * @empty: allow an empty segment
538
 *
539
 * Parse a segment and fills in the appropriate fields
540
 * of the @uri structure
541
 *
542
 * segment       = *pchar
543
 * segment-nz    = 1*pchar
544
 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
545
 *               ; non-zero-length segment without any colon ":"
546
 *
547
 * Returns 0 or the error code
548
 */
549
static int
550
xmlParse3986Segment(const char **str, char forbid, int empty)
551
1.34M
{
552
1.34M
    const char *cur;
553
554
1.34M
    cur = *str;
555
1.34M
    if (!ISA_PCHAR(cur)) {
556
399k
        if (empty)
557
370k
      return(0);
558
28.8k
  return(1);
559
399k
    }
560
6.95M
    while (ISA_PCHAR(cur) && (*cur != forbid))
561
6.00M
        NEXT(cur);
562
950k
    *str = cur;
563
950k
    return (0);
564
1.34M
}
565
566
/**
567
 * xmlParse3986PathAbEmpty:
568
 * @uri:  pointer to an URI structure
569
 * @str:  the string to analyze
570
 *
571
 * Parse an path absolute or empty and fills in the appropriate fields
572
 * of the @uri structure
573
 *
574
 * path-abempty  = *( "/" segment )
575
 *
576
 * Returns 0 or the error code
577
 */
578
static int
579
xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
580
210k
{
581
210k
    const char *cur;
582
210k
    int ret;
583
584
210k
    cur = *str;
585
586
685k
    while (*cur == '/') {
587
474k
        cur++;
588
474k
  ret = xmlParse3986Segment(&cur, 0, 1);
589
474k
  if (ret != 0) return(ret);
590
474k
    }
591
210k
    if (uri != NULL) {
592
210k
  if (uri->path != NULL) xmlFree(uri->path);
593
210k
        if (*str != cur) {
594
104k
            if (uri->cleanup & 2)
595
0
                uri->path = STRNDUP(*str, cur - *str);
596
104k
            else
597
104k
                uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
598
106k
        } else {
599
106k
            uri->path = NULL;
600
106k
        }
601
210k
    }
602
210k
    *str = cur;
603
210k
    return (0);
604
210k
}
605
606
/**
607
 * xmlParse3986PathAbsolute:
608
 * @uri:  pointer to an URI structure
609
 * @str:  the string to analyze
610
 *
611
 * Parse an path absolute and fills in the appropriate fields
612
 * of the @uri structure
613
 *
614
 * path-absolute = "/" [ segment-nz *( "/" segment ) ]
615
 *
616
 * Returns 0 or the error code
617
 */
618
static int
619
xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
620
64.6k
{
621
64.6k
    const char *cur;
622
64.6k
    int ret;
623
624
64.6k
    cur = *str;
625
626
64.6k
    if (*cur != '/')
627
0
        return(1);
628
64.6k
    cur++;
629
64.6k
    ret = xmlParse3986Segment(&cur, 0, 0);
630
64.6k
    if (ret == 0) {
631
62.8k
  while (*cur == '/') {
632
27.1k
      cur++;
633
27.1k
      ret = xmlParse3986Segment(&cur, 0, 1);
634
27.1k
      if (ret != 0) return(ret);
635
27.1k
  }
636
35.7k
    }
637
64.6k
    if (uri != NULL) {
638
64.6k
  if (uri->path != NULL) xmlFree(uri->path);
639
64.6k
        if (cur != *str) {
640
64.6k
            if (uri->cleanup & 2)
641
0
                uri->path = STRNDUP(*str, cur - *str);
642
64.6k
            else
643
64.6k
                uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
644
64.6k
        } else {
645
0
            uri->path = NULL;
646
0
        }
647
64.6k
    }
648
64.6k
    *str = cur;
649
64.6k
    return (0);
650
64.6k
}
651
652
/**
653
 * xmlParse3986PathRootless:
654
 * @uri:  pointer to an URI structure
655
 * @str:  the string to analyze
656
 *
657
 * Parse an path without root and fills in the appropriate fields
658
 * of the @uri structure
659
 *
660
 * path-rootless = segment-nz *( "/" segment )
661
 *
662
 * Returns 0 or the error code
663
 */
664
static int
665
xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
666
96.9k
{
667
96.9k
    const char *cur;
668
96.9k
    int ret;
669
670
96.9k
    cur = *str;
671
672
96.9k
    ret = xmlParse3986Segment(&cur, 0, 0);
673
96.9k
    if (ret != 0) return(ret);
674
143k
    while (*cur == '/') {
675
46.2k
        cur++;
676
46.2k
  ret = xmlParse3986Segment(&cur, 0, 1);
677
46.2k
  if (ret != 0) return(ret);
678
46.2k
    }
679
96.9k
    if (uri != NULL) {
680
96.9k
  if (uri->path != NULL) xmlFree(uri->path);
681
96.9k
        if (cur != *str) {
682
96.9k
            if (uri->cleanup & 2)
683
0
                uri->path = STRNDUP(*str, cur - *str);
684
96.9k
            else
685
96.9k
                uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
686
96.9k
        } else {
687
0
            uri->path = NULL;
688
0
        }
689
96.9k
    }
690
96.9k
    *str = cur;
691
96.9k
    return (0);
692
96.9k
}
693
694
/**
695
 * xmlParse3986PathNoScheme:
696
 * @uri:  pointer to an URI structure
697
 * @str:  the string to analyze
698
 *
699
 * Parse an path which is not a scheme and fills in the appropriate fields
700
 * of the @uri structure
701
 *
702
 * path-noscheme = segment-nz-nc *( "/" segment )
703
 *
704
 * Returns 0 or the error code
705
 */
706
static int
707
xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
708
366k
{
709
366k
    const char *cur;
710
366k
    int ret;
711
712
366k
    cur = *str;
713
714
366k
    ret = xmlParse3986Segment(&cur, ':', 0);
715
366k
    if (ret != 0) return(ret);
716
640k
    while (*cur == '/') {
717
274k
        cur++;
718
274k
  ret = xmlParse3986Segment(&cur, 0, 1);
719
274k
  if (ret != 0) return(ret);
720
274k
    }
721
366k
    if (uri != NULL) {
722
366k
  if (uri->path != NULL) xmlFree(uri->path);
723
366k
        if (cur != *str) {
724
360k
            if (uri->cleanup & 2)
725
0
                uri->path = STRNDUP(*str, cur - *str);
726
360k
            else
727
360k
                uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
728
360k
        } else {
729
5.77k
            uri->path = NULL;
730
5.77k
        }
731
366k
    }
732
366k
    *str = cur;
733
366k
    return (0);
734
366k
}
735
736
/**
737
 * xmlParse3986HierPart:
738
 * @uri:  pointer to an URI structure
739
 * @str:  the string to analyze
740
 *
741
 * Parse an hierarchical part and fills in the appropriate fields
742
 * of the @uri structure
743
 *
744
 * hier-part     = "//" authority path-abempty
745
 *                / path-absolute
746
 *                / path-rootless
747
 *                / path-empty
748
 *
749
 * Returns 0 or the error code
750
 */
751
static int
752
xmlParse3986HierPart(xmlURIPtr uri, const char **str)
753
301k
{
754
301k
    const char *cur;
755
301k
    int ret;
756
757
301k
    cur = *str;
758
759
301k
    if ((*cur == '/') && (*(cur + 1) == '/')) {
760
149k
        cur += 2;
761
149k
  ret = xmlParse3986Authority(uri, &cur);
762
149k
  if (ret != 0) return(ret);
763
135k
  if (uri->server == NULL)
764
3.52k
      uri->port = -1;
765
135k
  ret = xmlParse3986PathAbEmpty(uri, &cur);
766
135k
  if (ret != 0) return(ret);
767
135k
  *str = cur;
768
135k
  return(0);
769
151k
    } else if (*cur == '/') {
770
12.8k
        ret = xmlParse3986PathAbsolute(uri, &cur);
771
12.8k
  if (ret != 0) return(ret);
772
138k
    } else if (ISA_PCHAR(cur)) {
773
96.9k
        ret = xmlParse3986PathRootless(uri, &cur);
774
96.9k
  if (ret != 0) return(ret);
775
96.9k
    } else {
776
  /* path-empty is effectively empty */
777
41.9k
  if (uri != NULL) {
778
41.9k
      if (uri->path != NULL) xmlFree(uri->path);
779
41.9k
      uri->path = NULL;
780
41.9k
  }
781
41.9k
    }
782
151k
    *str = cur;
783
151k
    return (0);
784
301k
}
785
786
/**
787
 * xmlParse3986RelativeRef:
788
 * @uri:  pointer to an URI structure
789
 * @str:  the string to analyze
790
 *
791
 * Parse an URI string and fills in the appropriate fields
792
 * of the @uri structure
793
 *
794
 * relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
795
 * relative-part = "//" authority path-abempty
796
 *               / path-absolute
797
 *               / path-noscheme
798
 *               / path-empty
799
 *
800
 * Returns 0 or the error code
801
 */
802
static int
803
769k
xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
804
769k
    int ret;
805
806
769k
    if ((*str == '/') && (*(str + 1) == '/')) {
807
86.1k
        str += 2;
808
86.1k
  ret = xmlParse3986Authority(uri, &str);
809
86.1k
  if (ret != 0) return(ret);
810
75.3k
  ret = xmlParse3986PathAbEmpty(uri, &str);
811
75.3k
  if (ret != 0) return(ret);
812
683k
    } else if (*str == '/') {
813
51.8k
  ret = xmlParse3986PathAbsolute(uri, &str);
814
51.8k
  if (ret != 0) return(ret);
815
631k
    } else if (ISA_PCHAR(str)) {
816
366k
        ret = xmlParse3986PathNoScheme(uri, &str);
817
366k
  if (ret != 0) return(ret);
818
366k
    } else {
819
  /* path-empty is effectively empty */
820
265k
  if (uri != NULL) {
821
265k
      if (uri->path != NULL) xmlFree(uri->path);
822
265k
      uri->path = NULL;
823
265k
  }
824
265k
    }
825
826
758k
    if (*str == '?') {
827
50.3k
  str++;
828
50.3k
  ret = xmlParse3986Query(uri, &str);
829
50.3k
  if (ret != 0) return(ret);
830
50.3k
    }
831
758k
    if (*str == '#') {
832
39.4k
  str++;
833
39.4k
  ret = xmlParse3986Fragment(uri, &str);
834
39.4k
  if (ret != 0) return(ret);
835
39.4k
    }
836
758k
    if (*str != 0) {
837
403k
  xmlCleanURI(uri);
838
403k
  return(1);
839
403k
    }
840
355k
    return(0);
841
758k
}
842
843
844
/**
845
 * xmlParse3986URI:
846
 * @uri:  pointer to an URI structure
847
 * @str:  the string to analyze
848
 *
849
 * Parse an URI string and fills in the appropriate fields
850
 * of the @uri structure
851
 *
852
 * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
853
 *
854
 * Returns 0 or the error code
855
 */
856
static int
857
939k
xmlParse3986URI(xmlURIPtr uri, const char *str) {
858
939k
    int ret;
859
860
939k
    ret = xmlParse3986Scheme(uri, &str);
861
939k
    if (ret != 0) return(ret);
862
393k
    if (*str != ':') {
863
92.2k
  return(1);
864
92.2k
    }
865
301k
    str++;
866
301k
    ret = xmlParse3986HierPart(uri, &str);
867
301k
    if (ret != 0) return(ret);
868
287k
    if (*str == '?') {
869
13.6k
  str++;
870
13.6k
  ret = xmlParse3986Query(uri, &str);
871
13.6k
  if (ret != 0) return(ret);
872
13.6k
    }
873
287k
    if (*str == '#') {
874
12.1k
  str++;
875
12.1k
  ret = xmlParse3986Fragment(uri, &str);
876
12.1k
  if (ret != 0) return(ret);
877
12.1k
    }
878
287k
    if (*str != 0) {
879
117k
  xmlCleanURI(uri);
880
117k
  return(1);
881
117k
    }
882
170k
    return(0);
883
287k
}
884
885
/**
886
 * xmlParse3986URIReference:
887
 * @uri:  pointer to an URI structure
888
 * @str:  the string to analyze
889
 *
890
 * Parse an URI reference string and fills in the appropriate fields
891
 * of the @uri structure
892
 *
893
 * URI-reference = URI / relative-ref
894
 *
895
 * Returns 0 or the error code
896
 */
897
static int
898
939k
xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
899
939k
    int ret;
900
901
939k
    if (str == NULL)
902
0
  return(-1);
903
939k
    xmlCleanURI(uri);
904
905
    /*
906
     * Try first to parse absolute refs, then fallback to relative if
907
     * it fails.
908
     */
909
939k
    ret = xmlParse3986URI(uri, str);
910
939k
    if (ret != 0) {
911
769k
  xmlCleanURI(uri);
912
769k
        ret = xmlParse3986RelativeRef(uri, str);
913
769k
  if (ret != 0) {
914
414k
      xmlCleanURI(uri);
915
414k
      return(ret);
916
414k
  }
917
769k
    }
918
525k
    return(0);
919
939k
}
920
921
/**
922
 * xmlParseURI:
923
 * @str:  the URI string to analyze
924
 *
925
 * Parse an URI based on RFC 3986
926
 *
927
 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
928
 *
929
 * Returns a newly built xmlURIPtr or NULL in case of error
930
 */
931
xmlURIPtr
932
926k
xmlParseURI(const char *str) {
933
926k
    xmlURIPtr uri;
934
926k
    int ret;
935
936
926k
    if (str == NULL)
937
0
  return(NULL);
938
926k
    uri = xmlCreateURI();
939
926k
    if (uri != NULL) {
940
926k
  ret = xmlParse3986URIReference(uri, str);
941
926k
        if (ret) {
942
412k
      xmlFreeURI(uri);
943
412k
      return(NULL);
944
412k
  }
945
926k
    }
946
514k
    return(uri);
947
926k
}
948
949
/**
950
 * xmlParseURIReference:
951
 * @uri:  pointer to an URI structure
952
 * @str:  the string to analyze
953
 *
954
 * Parse an URI reference string based on RFC 3986 and fills in the
955
 * appropriate fields of the @uri structure
956
 *
957
 * URI-reference = URI / relative-ref
958
 *
959
 * Returns 0 or the error code
960
 */
961
int
962
12.9k
xmlParseURIReference(xmlURIPtr uri, const char *str) {
963
12.9k
    return(xmlParse3986URIReference(uri, str));
964
12.9k
}
965
966
/**
967
 * xmlParseURIRaw:
968
 * @str:  the URI string to analyze
969
 * @raw:  if 1 unescaping of URI pieces are disabled
970
 *
971
 * Parse an URI but allows to keep intact the original fragments.
972
 *
973
 * URI-reference = URI / relative-ref
974
 *
975
 * Returns a newly built xmlURIPtr or NULL in case of error
976
 */
977
xmlURIPtr
978
0
xmlParseURIRaw(const char *str, int raw) {
979
0
    xmlURIPtr uri;
980
0
    int ret;
981
982
0
    if (str == NULL)
983
0
  return(NULL);
984
0
    uri = xmlCreateURI();
985
0
    if (uri != NULL) {
986
0
        if (raw) {
987
0
      uri->cleanup |= 2;
988
0
  }
989
0
  ret = xmlParseURIReference(uri, str);
990
0
        if (ret) {
991
0
      xmlFreeURI(uri);
992
0
      return(NULL);
993
0
  }
994
0
    }
995
0
    return(uri);
996
0
}
997
998
/************************************************************************
999
 *                  *
1000
 *      Generic URI structure functions     *
1001
 *                  *
1002
 ************************************************************************/
1003
1004
/**
1005
 * xmlCreateURI:
1006
 *
1007
 * Simply creates an empty xmlURI
1008
 *
1009
 * Returns the new structure or NULL in case of error
1010
 */
1011
xmlURIPtr
1012
944k
xmlCreateURI(void) {
1013
944k
    xmlURIPtr ret;
1014
1015
944k
    ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
1016
944k
    if (ret == NULL) {
1017
0
        xmlURIErrMemory("creating URI structure\n");
1018
0
  return(NULL);
1019
0
    }
1020
944k
    memset(ret, 0, sizeof(xmlURI));
1021
944k
    return(ret);
1022
944k
}
1023
1024
/**
1025
 * xmlSaveUriRealloc:
1026
 *
1027
 * Function to handle properly a reallocation when saving an URI
1028
 * Also imposes some limit on the length of an URI string output
1029
 */
1030
static xmlChar *
1031
4.26k
xmlSaveUriRealloc(xmlChar *ret, int *max) {
1032
4.26k
    xmlChar *temp;
1033
4.26k
    int tmp;
1034
1035
4.26k
    if (*max > MAX_URI_LENGTH) {
1036
0
        xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
1037
0
        return(NULL);
1038
0
    }
1039
4.26k
    tmp = *max * 2;
1040
4.26k
    temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
1041
4.26k
    if (temp == NULL) {
1042
0
        xmlURIErrMemory("saving URI\n");
1043
0
        return(NULL);
1044
0
    }
1045
4.26k
    *max = tmp;
1046
4.26k
    return(temp);
1047
4.26k
}
1048
1049
/**
1050
 * xmlSaveUri:
1051
 * @uri:  pointer to an xmlURI
1052
 *
1053
 * Save the URI as an escaped string
1054
 *
1055
 * Returns a new string (to be deallocated by caller)
1056
 */
1057
xmlChar *
1058
6.93k
xmlSaveUri(xmlURIPtr uri) {
1059
6.93k
    xmlChar *ret = NULL;
1060
6.93k
    xmlChar *temp;
1061
6.93k
    const char *p;
1062
6.93k
    int len;
1063
6.93k
    int max;
1064
1065
6.93k
    if (uri == NULL) return(NULL);
1066
1067
1068
6.93k
    max = 80;
1069
6.93k
    ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
1070
6.93k
    if (ret == NULL) {
1071
0
        xmlURIErrMemory("saving URI\n");
1072
0
  return(NULL);
1073
0
    }
1074
6.93k
    len = 0;
1075
1076
6.93k
    if (uri->scheme != NULL) {
1077
0
  p = uri->scheme;
1078
0
  while (*p != 0) {
1079
0
      if (len >= max) {
1080
0
                temp = xmlSaveUriRealloc(ret, &max);
1081
0
                if (temp == NULL) goto mem_error;
1082
0
    ret = temp;
1083
0
      }
1084
0
      ret[len++] = *p++;
1085
0
  }
1086
0
  if (len >= max) {
1087
0
            temp = xmlSaveUriRealloc(ret, &max);
1088
0
            if (temp == NULL) goto mem_error;
1089
0
            ret = temp;
1090
0
  }
1091
0
  ret[len++] = ':';
1092
0
    }
1093
6.93k
    if (uri->opaque != NULL) {
1094
0
  p = uri->opaque;
1095
0
  while (*p != 0) {
1096
0
      if (len + 3 >= max) {
1097
0
                temp = xmlSaveUriRealloc(ret, &max);
1098
0
                if (temp == NULL) goto mem_error;
1099
0
                ret = temp;
1100
0
      }
1101
0
      if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1102
0
    ret[len++] = *p++;
1103
0
      else {
1104
0
    int val = *(unsigned char *)p++;
1105
0
    int hi = val / 0x10, lo = val % 0x10;
1106
0
    ret[len++] = '%';
1107
0
    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1108
0
    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1109
0
      }
1110
0
  }
1111
6.93k
    } else {
1112
6.93k
  if ((uri->server != NULL) || (uri->port == -1)) {
1113
2.49k
      if (len + 3 >= max) {
1114
0
                temp = xmlSaveUriRealloc(ret, &max);
1115
0
                if (temp == NULL) goto mem_error;
1116
0
                ret = temp;
1117
0
      }
1118
2.49k
      ret[len++] = '/';
1119
2.49k
      ret[len++] = '/';
1120
2.49k
      if (uri->user != NULL) {
1121
1.55k
    p = uri->user;
1122
1.28M
    while (*p != 0) {
1123
1.28M
        if (len + 3 >= max) {
1124
1.70k
                        temp = xmlSaveUriRealloc(ret, &max);
1125
1.70k
                        if (temp == NULL) goto mem_error;
1126
1.70k
                        ret = temp;
1127
1.70k
        }
1128
1.28M
        if ((IS_UNRESERVED(*(p))) ||
1129
883k
      ((*(p) == ';')) || ((*(p) == ':')) ||
1130
861k
      ((*(p) == '&')) || ((*(p) == '=')) ||
1131
755k
      ((*(p) == '+')) || ((*(p) == '$')) ||
1132
747k
      ((*(p) == ',')))
1133
563k
      ret[len++] = *p++;
1134
717k
        else {
1135
717k
      int val = *(unsigned char *)p++;
1136
717k
      int hi = val / 0x10, lo = val % 0x10;
1137
717k
      ret[len++] = '%';
1138
717k
      ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1139
717k
      ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1140
717k
        }
1141
1.28M
    }
1142
1.55k
    if (len + 3 >= max) {
1143
19
                    temp = xmlSaveUriRealloc(ret, &max);
1144
19
                    if (temp == NULL) goto mem_error;
1145
19
                    ret = temp;
1146
19
    }
1147
1.55k
    ret[len++] = '@';
1148
1.55k
      }
1149
2.49k
      if (uri->server != NULL) {
1150
2.49k
    p = uri->server;
1151
401k
    while (*p != 0) {
1152
399k
        if (len >= max) {
1153
370
      temp = xmlSaveUriRealloc(ret, &max);
1154
370
      if (temp == NULL) goto mem_error;
1155
370
      ret = temp;
1156
370
        }
1157
399k
        ret[len++] = *p++;
1158
399k
    }
1159
2.49k
    if (uri->port > 0) {
1160
140
        if (len + 10 >= max) {
1161
10
      temp = xmlSaveUriRealloc(ret, &max);
1162
10
      if (temp == NULL) goto mem_error;
1163
10
      ret = temp;
1164
10
        }
1165
140
        len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1166
140
    }
1167
2.49k
      }
1168
4.44k
  } else if (uri->authority != NULL) {
1169
0
      if (len + 3 >= max) {
1170
0
                temp = xmlSaveUriRealloc(ret, &max);
1171
0
                if (temp == NULL) goto mem_error;
1172
0
                ret = temp;
1173
0
      }
1174
0
      ret[len++] = '/';
1175
0
      ret[len++] = '/';
1176
0
      p = uri->authority;
1177
0
      while (*p != 0) {
1178
0
    if (len + 3 >= max) {
1179
0
                    temp = xmlSaveUriRealloc(ret, &max);
1180
0
                    if (temp == NULL) goto mem_error;
1181
0
                    ret = temp;
1182
0
    }
1183
0
    if ((IS_UNRESERVED(*(p))) ||
1184
0
                    ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1185
0
                    ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1186
0
                    ((*(p) == '=')) || ((*(p) == '+')))
1187
0
        ret[len++] = *p++;
1188
0
    else {
1189
0
        int val = *(unsigned char *)p++;
1190
0
        int hi = val / 0x10, lo = val % 0x10;
1191
0
        ret[len++] = '%';
1192
0
        ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1193
0
        ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1194
0
    }
1195
0
      }
1196
4.44k
  } else if (uri->scheme != NULL) {
1197
0
      if (len + 3 >= max) {
1198
0
                temp = xmlSaveUriRealloc(ret, &max);
1199
0
                if (temp == NULL) goto mem_error;
1200
0
                ret = temp;
1201
0
      }
1202
0
  }
1203
6.93k
  if (uri->path != NULL) {
1204
3.96k
      p = uri->path;
1205
      /*
1206
       * the colon in file:///d: should not be escaped or
1207
       * Windows accesses fail later.
1208
       */
1209
3.96k
      if ((uri->scheme != NULL) &&
1210
0
    (p[0] == '/') &&
1211
0
    (((p[1] >= 'a') && (p[1] <= 'z')) ||
1212
0
     ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1213
0
    (p[2] == ':') &&
1214
0
          (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1215
0
    if (len + 3 >= max) {
1216
0
                    temp = xmlSaveUriRealloc(ret, &max);
1217
0
                    if (temp == NULL) goto mem_error;
1218
0
                    ret = temp;
1219
0
    }
1220
0
    ret[len++] = *p++;
1221
0
    ret[len++] = *p++;
1222
0
    ret[len++] = *p++;
1223
0
      }
1224
448k
      while (*p != 0) {
1225
444k
    if (len + 3 >= max) {
1226
800
                    temp = xmlSaveUriRealloc(ret, &max);
1227
800
                    if (temp == NULL) goto mem_error;
1228
800
                    ret = temp;
1229
800
    }
1230
444k
    if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1231
59.4k
                    ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1232
37.8k
              ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1233
17.7k
              ((*(p) == ',')))
1234
431k
        ret[len++] = *p++;
1235
12.8k
    else {
1236
12.8k
        int val = *(unsigned char *)p++;
1237
12.8k
        int hi = val / 0x10, lo = val % 0x10;
1238
12.8k
        ret[len++] = '%';
1239
12.8k
        ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1240
12.8k
        ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1241
12.8k
    }
1242
444k
      }
1243
3.96k
  }
1244
6.93k
  if (uri->query_raw != NULL) {
1245
1.06k
      if (len + 1 >= max) {
1246
34
                temp = xmlSaveUriRealloc(ret, &max);
1247
34
                if (temp == NULL) goto mem_error;
1248
34
                ret = temp;
1249
34
      }
1250
1.06k
      ret[len++] = '?';
1251
1.06k
      p = uri->query_raw;
1252
366k
      while (*p != 0) {
1253
365k
    if (len + 1 >= max) {
1254
435
                    temp = xmlSaveUriRealloc(ret, &max);
1255
435
                    if (temp == NULL) goto mem_error;
1256
435
                    ret = temp;
1257
435
    }
1258
365k
    ret[len++] = *p++;
1259
365k
      }
1260
5.87k
  } else if (uri->query != NULL) {
1261
0
      if (len + 3 >= max) {
1262
0
                temp = xmlSaveUriRealloc(ret, &max);
1263
0
                if (temp == NULL) goto mem_error;
1264
0
                ret = temp;
1265
0
      }
1266
0
      ret[len++] = '?';
1267
0
      p = uri->query;
1268
0
      while (*p != 0) {
1269
0
    if (len + 3 >= max) {
1270
0
                    temp = xmlSaveUriRealloc(ret, &max);
1271
0
                    if (temp == NULL) goto mem_error;
1272
0
                    ret = temp;
1273
0
    }
1274
0
    if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1275
0
        ret[len++] = *p++;
1276
0
    else {
1277
0
        int val = *(unsigned char *)p++;
1278
0
        int hi = val / 0x10, lo = val % 0x10;
1279
0
        ret[len++] = '%';
1280
0
        ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1281
0
        ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1282
0
    }
1283
0
      }
1284
0
  }
1285
6.93k
    }
1286
6.93k
    if (uri->fragment != NULL) {
1287
2.16k
  if (len + 3 >= max) {
1288
35
            temp = xmlSaveUriRealloc(ret, &max);
1289
35
            if (temp == NULL) goto mem_error;
1290
35
            ret = temp;
1291
35
  }
1292
2.16k
  ret[len++] = '#';
1293
2.16k
  p = uri->fragment;
1294
787k
  while (*p != 0) {
1295
785k
      if (len + 3 >= max) {
1296
832
                temp = xmlSaveUriRealloc(ret, &max);
1297
832
                if (temp == NULL) goto mem_error;
1298
832
                ret = temp;
1299
832
      }
1300
785k
      if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1301
780k
    ret[len++] = *p++;
1302
4.64k
      else {
1303
4.64k
    int val = *(unsigned char *)p++;
1304
4.64k
    int hi = val / 0x10, lo = val % 0x10;
1305
4.64k
    ret[len++] = '%';
1306
4.64k
    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1307
4.64k
    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1308
4.64k
      }
1309
785k
  }
1310
2.16k
    }
1311
6.93k
    if (len >= max) {
1312
18
        temp = xmlSaveUriRealloc(ret, &max);
1313
18
        if (temp == NULL) goto mem_error;
1314
18
        ret = temp;
1315
18
    }
1316
6.93k
    ret[len] = 0;
1317
6.93k
    return(ret);
1318
1319
0
mem_error:
1320
0
    xmlFree(ret);
1321
0
    return(NULL);
1322
6.93k
}
1323
1324
/**
1325
 * xmlPrintURI:
1326
 * @stream:  a FILE* for the output
1327
 * @uri:  pointer to an xmlURI
1328
 *
1329
 * Prints the URI in the stream @stream.
1330
 */
1331
void
1332
0
xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1333
0
    xmlChar *out;
1334
1335
0
    out = xmlSaveUri(uri);
1336
0
    if (out != NULL) {
1337
0
  fprintf(stream, "%s", (char *) out);
1338
0
  xmlFree(out);
1339
0
    }
1340
0
}
1341
1342
/**
1343
 * xmlCleanURI:
1344
 * @uri:  pointer to an xmlURI
1345
 *
1346
 * Make sure the xmlURI struct is free of content
1347
 */
1348
static void
1349
2.64M
xmlCleanURI(xmlURIPtr uri) {
1350
2.64M
    if (uri == NULL) return;
1351
1352
2.64M
    if (uri->scheme != NULL) xmlFree(uri->scheme);
1353
2.64M
    uri->scheme = NULL;
1354
2.64M
    if (uri->server != NULL) xmlFree(uri->server);
1355
2.64M
    uri->server = NULL;
1356
2.64M
    if (uri->user != NULL) xmlFree(uri->user);
1357
2.64M
    uri->user = NULL;
1358
2.64M
    if (uri->path != NULL) xmlFree(uri->path);
1359
2.64M
    uri->path = NULL;
1360
2.64M
    if (uri->fragment != NULL) xmlFree(uri->fragment);
1361
2.64M
    uri->fragment = NULL;
1362
2.64M
    if (uri->opaque != NULL) xmlFree(uri->opaque);
1363
2.64M
    uri->opaque = NULL;
1364
2.64M
    if (uri->authority != NULL) xmlFree(uri->authority);
1365
2.64M
    uri->authority = NULL;
1366
2.64M
    if (uri->query != NULL) xmlFree(uri->query);
1367
2.64M
    uri->query = NULL;
1368
2.64M
    if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1369
2.64M
    uri->query_raw = NULL;
1370
2.64M
}
1371
1372
/**
1373
 * xmlFreeURI:
1374
 * @uri:  pointer to an xmlURI
1375
 *
1376
 * Free up the xmlURI struct
1377
 */
1378
void
1379
944k
xmlFreeURI(xmlURIPtr uri) {
1380
944k
    if (uri == NULL) return;
1381
1382
944k
    if (uri->scheme != NULL) xmlFree(uri->scheme);
1383
944k
    if (uri->server != NULL) xmlFree(uri->server);
1384
944k
    if (uri->user != NULL) xmlFree(uri->user);
1385
944k
    if (uri->path != NULL) xmlFree(uri->path);
1386
944k
    if (uri->fragment != NULL) xmlFree(uri->fragment);
1387
944k
    if (uri->opaque != NULL) xmlFree(uri->opaque);
1388
944k
    if (uri->authority != NULL) xmlFree(uri->authority);
1389
944k
    if (uri->query != NULL) xmlFree(uri->query);
1390
944k
    if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1391
944k
    xmlFree(uri);
1392
944k
}
1393
1394
/************************************************************************
1395
 *                  *
1396
 *      Helper functions        *
1397
 *                  *
1398
 ************************************************************************/
1399
1400
/**
1401
 * xmlNormalizeURIPath:
1402
 * @path:  pointer to the path string
1403
 *
1404
 * Applies the 5 normalization steps to a path string--that is, RFC 2396
1405
 * Section 5.2, steps 6.c through 6.g.
1406
 *
1407
 * Normalization occurs directly on the string, no new allocation is done
1408
 *
1409
 * Returns 0 or an error code
1410
 */
1411
int
1412
2.11k
xmlNormalizeURIPath(char *path) {
1413
2.11k
    char *cur, *out;
1414
1415
2.11k
    if (path == NULL)
1416
0
  return(-1);
1417
1418
    /* Skip all initial "/" chars.  We want to get to the beginning of the
1419
     * first non-empty segment.
1420
     */
1421
2.11k
    cur = path;
1422
2.35k
    while (cur[0] == '/')
1423
243
      ++cur;
1424
2.11k
    if (cur[0] == '\0')
1425
9
      return(0);
1426
1427
    /* Keep everything we've seen so far.  */
1428
2.10k
    out = cur;
1429
1430
    /*
1431
     * Analyze each segment in sequence for cases (c) and (d).
1432
     */
1433
19.9k
    while (cur[0] != '\0') {
1434
  /*
1435
   * c) All occurrences of "./", where "." is a complete path segment,
1436
   *    are removed from the buffer string.
1437
   */
1438
19.8k
  if ((cur[0] == '.') && (cur[1] == '/')) {
1439
2.80k
      cur += 2;
1440
      /* '//' normalization should be done at this point too */
1441
4.42k
      while (cur[0] == '/')
1442
1.62k
    cur++;
1443
2.80k
      continue;
1444
2.80k
  }
1445
1446
  /*
1447
   * d) If the buffer string ends with "." as a complete path segment,
1448
   *    that "." is removed.
1449
   */
1450
17.0k
  if ((cur[0] == '.') && (cur[1] == '\0'))
1451
102
      break;
1452
1453
  /* Otherwise keep the segment.  */
1454
143k
  while (cur[0] != '/') {
1455
128k
            if (cur[0] == '\0')
1456
1.87k
              goto done_cd;
1457
126k
      (out++)[0] = (cur++)[0];
1458
126k
  }
1459
  /* nomalize // */
1460
19.3k
  while ((cur[0] == '/') && (cur[1] == '/'))
1461
4.30k
      cur++;
1462
1463
15.0k
        (out++)[0] = (cur++)[0];
1464
15.0k
    }
1465
2.10k
 done_cd:
1466
2.10k
    out[0] = '\0';
1467
1468
    /* Reset to the beginning of the first segment for the next sequence.  */
1469
2.10k
    cur = path;
1470
2.34k
    while (cur[0] == '/')
1471
243
      ++cur;
1472
2.10k
    if (cur[0] == '\0')
1473
16
  return(0);
1474
1475
    /*
1476
     * Analyze each segment in sequence for cases (e) and (f).
1477
     *
1478
     * e) All occurrences of "<segment>/../", where <segment> is a
1479
     *    complete path segment not equal to "..", are removed from the
1480
     *    buffer string.  Removal of these path segments is performed
1481
     *    iteratively, removing the leftmost matching pattern on each
1482
     *    iteration, until no matching pattern remains.
1483
     *
1484
     * f) If the buffer string ends with "<segment>/..", where <segment>
1485
     *    is a complete path segment not equal to "..", that
1486
     *    "<segment>/.." is removed.
1487
     *
1488
     * To satisfy the "iterative" clause in (e), we need to collapse the
1489
     * string every time we find something that needs to be removed.  Thus,
1490
     * we don't need to keep two pointers into the string: we only need a
1491
     * "current position" pointer.
1492
     */
1493
16.4k
    while (1) {
1494
16.4k
        char *segp, *tmp;
1495
1496
        /* At the beginning of each iteration of this loop, "cur" points to
1497
         * the first character of the segment we want to examine.
1498
         */
1499
1500
        /* Find the end of the current segment.  */
1501
16.4k
        segp = cur;
1502
147k
        while ((segp[0] != '/') && (segp[0] != '\0'))
1503
131k
          ++segp;
1504
1505
        /* If this is the last segment, we're done (we need at least two
1506
         * segments to meet the criteria for the (e) and (f) cases).
1507
         */
1508
16.4k
        if (segp[0] == '\0')
1509
2.02k
          break;
1510
1511
        /* If the first segment is "..", or if the next segment _isn't_ "..",
1512
         * keep this segment and try the next one.
1513
         */
1514
14.3k
        ++segp;
1515
14.3k
        if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1516
13.4k
            || ((segp[0] != '.') || (segp[1] != '.')
1517
9.28k
                || ((segp[2] != '/') && (segp[2] != '\0')))) {
1518
9.28k
          cur = segp;
1519
9.28k
          continue;
1520
9.28k
        }
1521
1522
        /* If we get here, remove this segment and the next one and back up
1523
         * to the previous segment (if there is one), to implement the
1524
         * "iteratively" clause.  It's pretty much impossible to back up
1525
         * while maintaining two pointers into the buffer, so just compact
1526
         * the whole buffer now.
1527
         */
1528
1529
        /* If this is the end of the buffer, we're done.  */
1530
5.11k
        if (segp[2] == '\0') {
1531
58
          cur[0] = '\0';
1532
58
          break;
1533
58
        }
1534
        /* Valgrind complained, strcpy(cur, segp + 3); */
1535
        /* string will overlap, do not use strcpy */
1536
5.05k
        tmp = cur;
1537
5.05k
        segp += 3;
1538
1.91M
        while ((*tmp++ = *segp++) != 0)
1539
1.91M
          ;
1540
1541
        /* If there are no previous segments, then keep going from here.  */
1542
5.05k
        segp = cur;
1543
9.57k
        while ((segp > path) && ((--segp)[0] == '/'))
1544
4.51k
          ;
1545
5.05k
        if (segp == path)
1546
645
          continue;
1547
1548
        /* "segp" is pointing to the end of a previous segment; find it's
1549
         * start.  We need to back up to the previous segment and start
1550
         * over with that to handle things like "foo/bar/../..".  If we
1551
         * don't do this, then on the first pass we'll remove the "bar/..",
1552
         * but be pointing at the second ".." so we won't realize we can also
1553
         * remove the "foo/..".
1554
         */
1555
4.40k
        cur = segp;
1556
14.5k
        while ((cur > path) && (cur[-1] != '/'))
1557
10.1k
          --cur;
1558
4.40k
    }
1559
2.08k
    out[0] = '\0';
1560
1561
    /*
1562
     * g) If the resulting buffer string still begins with one or more
1563
     *    complete path segments of "..", then the reference is
1564
     *    considered to be in error. Implementations may handle this
1565
     *    error by retaining these components in the resolved path (i.e.,
1566
     *    treating them as part of the final URI), by removing them from
1567
     *    the resolved path (i.e., discarding relative levels above the
1568
     *    root), or by avoiding traversal of the reference.
1569
     *
1570
     * We discard them from the final path.
1571
     */
1572
2.08k
    if (path[0] == '/') {
1573
243
      cur = path;
1574
243
      while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1575
0
             && ((cur[3] == '/') || (cur[3] == '\0')))
1576
0
  cur += 3;
1577
1578
243
      if (cur != path) {
1579
0
  out = path;
1580
0
  while (cur[0] != '\0')
1581
0
          (out++)[0] = (cur++)[0];
1582
0
  out[0] = 0;
1583
0
      }
1584
243
    }
1585
1586
2.08k
    return(0);
1587
2.10k
}
1588
1589
505k
static int is_hex(char c) {
1590
505k
    if (((c >= '0') && (c <= '9')) ||
1591
404k
        ((c >= 'a') && (c <= 'f')) ||
1592
241k
        ((c >= 'A') && (c <= 'F')))
1593
433k
  return(1);
1594
72.3k
    return(0);
1595
505k
}
1596
1597
/**
1598
 * xmlURIUnescapeString:
1599
 * @str:  the string to unescape
1600
 * @len:   the length in bytes to unescape (or <= 0 to indicate full string)
1601
 * @target:  optional destination buffer
1602
 *
1603
 * Unescaping routine, but does not check that the string is an URI. The
1604
 * output is a direct unsigned char translation of %XX values (no encoding)
1605
 * Note that the length of the result can only be smaller or same size as
1606
 * the input string.
1607
 *
1608
 * Returns a copy of the string, but unescaped, will return NULL only in case
1609
 * of error
1610
 */
1611
char *
1612
970k
xmlURIUnescapeString(const char *str, int len, char *target) {
1613
970k
    char *ret, *out;
1614
970k
    const char *in;
1615
1616
970k
    if (str == NULL)
1617
0
  return(NULL);
1618
970k
    if (len <= 0) len = strlen(str);
1619
970k
    if (len < 0) return(NULL);
1620
1621
970k
    if (target == NULL) {
1622
970k
  ret = (char *) xmlMallocAtomic(len + 1);
1623
970k
  if (ret == NULL) {
1624
0
            xmlURIErrMemory("unescaping URI value\n");
1625
0
      return(NULL);
1626
0
  }
1627
970k
    } else
1628
0
  ret = target;
1629
970k
    in = str;
1630
970k
    out = ret;
1631
292M
    while(len > 0) {
1632
291M
  if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1633
208k
      in++;
1634
208k
      if ((*in >= '0') && (*in <= '9'))
1635
52.2k
          *out = (*in - '0');
1636
155k
      else if ((*in >= 'a') && (*in <= 'f'))
1637
65.4k
          *out = (*in - 'a') + 10;
1638
90.5k
      else if ((*in >= 'A') && (*in <= 'F'))
1639
90.5k
          *out = (*in - 'A') + 10;
1640
208k
      in++;
1641
208k
      if ((*in >= '0') && (*in <= '9'))
1642
46.1k
          *out = *out * 16 + (*in - '0');
1643
162k
      else if ((*in >= 'a') && (*in <= 'f'))
1644
87.1k
          *out = *out * 16 + (*in - 'a') + 10;
1645
74.9k
      else if ((*in >= 'A') && (*in <= 'F'))
1646
74.9k
          *out = *out * 16 + (*in - 'A') + 10;
1647
208k
      in++;
1648
208k
      len -= 3;
1649
208k
      out++;
1650
291M
  } else {
1651
291M
      *out++ = *in++;
1652
291M
      len--;
1653
291M
  }
1654
291M
    }
1655
970k
    *out = 0;
1656
970k
    return(ret);
1657
970k
}
1658
1659
/**
1660
 * xmlURIEscapeStr:
1661
 * @str:  string to escape
1662
 * @list: exception list string of chars not to escape
1663
 *
1664
 * This routine escapes a string to hex, ignoring reserved characters (a-z)
1665
 * and the characters in the exception list.
1666
 *
1667
 * Returns a new escaped string or NULL in case of error.
1668
 */
1669
xmlChar *
1670
0
xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1671
0
    xmlChar *ret, ch;
1672
0
    xmlChar *temp;
1673
0
    const xmlChar *in;
1674
0
    int len, out;
1675
1676
0
    if (str == NULL)
1677
0
  return(NULL);
1678
0
    if (str[0] == 0)
1679
0
  return(xmlStrdup(str));
1680
0
    len = xmlStrlen(str);
1681
0
    if (!(len > 0)) return(NULL);
1682
1683
0
    len += 20;
1684
0
    ret = (xmlChar *) xmlMallocAtomic(len);
1685
0
    if (ret == NULL) {
1686
0
        xmlURIErrMemory("escaping URI value\n");
1687
0
  return(NULL);
1688
0
    }
1689
0
    in = (const xmlChar *) str;
1690
0
    out = 0;
1691
0
    while(*in != 0) {
1692
0
  if (len - out <= 3) {
1693
0
            temp = xmlSaveUriRealloc(ret, &len);
1694
0
      if (temp == NULL) {
1695
0
                xmlURIErrMemory("escaping URI value\n");
1696
0
    xmlFree(ret);
1697
0
    return(NULL);
1698
0
      }
1699
0
      ret = temp;
1700
0
  }
1701
1702
0
  ch = *in;
1703
1704
0
  if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1705
0
      unsigned char val;
1706
0
      ret[out++] = '%';
1707
0
      val = ch >> 4;
1708
0
      if (val <= 9)
1709
0
    ret[out++] = '0' + val;
1710
0
      else
1711
0
    ret[out++] = 'A' + val - 0xA;
1712
0
      val = ch & 0xF;
1713
0
      if (val <= 9)
1714
0
    ret[out++] = '0' + val;
1715
0
      else
1716
0
    ret[out++] = 'A' + val - 0xA;
1717
0
      in++;
1718
0
  } else {
1719
0
      ret[out++] = *in++;
1720
0
  }
1721
1722
0
    }
1723
0
    ret[out] = 0;
1724
0
    return(ret);
1725
0
}
1726
1727
/**
1728
 * xmlURIEscape:
1729
 * @str:  the string of the URI to escape
1730
 *
1731
 * Escaping routine, does not do validity checks !
1732
 * It will try to escape the chars needing this, but this is heuristic
1733
 * based it's impossible to be sure.
1734
 *
1735
 * Returns an copy of the string, but escaped
1736
 *
1737
 * 25 May 2001
1738
 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1739
 * according to RFC2396.
1740
 *   - Carl Douglas
1741
 */
1742
xmlChar *
1743
xmlURIEscape(const xmlChar * str)
1744
0
{
1745
0
    xmlChar *ret, *segment = NULL;
1746
0
    xmlURIPtr uri;
1747
0
    int ret2;
1748
1749
0
#define NULLCHK(p) if(!p) { \
1750
0
         xmlURIErrMemory("escaping URI value\n"); \
1751
0
         xmlFreeURI(uri); \
1752
0
         return NULL; } \
1753
0
1754
0
    if (str == NULL)
1755
0
        return (NULL);
1756
1757
0
    uri = xmlCreateURI();
1758
0
    if (uri != NULL) {
1759
  /*
1760
   * Allow escaping errors in the unescaped form
1761
   */
1762
0
        uri->cleanup = 1;
1763
0
        ret2 = xmlParseURIReference(uri, (const char *)str);
1764
0
        if (ret2) {
1765
0
            xmlFreeURI(uri);
1766
0
            return (NULL);
1767
0
        }
1768
0
    }
1769
1770
0
    if (!uri)
1771
0
        return NULL;
1772
1773
0
    ret = NULL;
1774
1775
0
    if (uri->scheme) {
1776
0
        segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1777
0
        NULLCHK(segment)
1778
0
        ret = xmlStrcat(ret, segment);
1779
0
        ret = xmlStrcat(ret, BAD_CAST ":");
1780
0
        xmlFree(segment);
1781
0
    }
1782
1783
0
    if (uri->authority) {
1784
0
        segment =
1785
0
            xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1786
0
        NULLCHK(segment)
1787
0
        ret = xmlStrcat(ret, BAD_CAST "//");
1788
0
        ret = xmlStrcat(ret, segment);
1789
0
        xmlFree(segment);
1790
0
    }
1791
1792
0
    if (uri->user) {
1793
0
        segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1794
0
        NULLCHK(segment)
1795
0
    ret = xmlStrcat(ret,BAD_CAST "//");
1796
0
        ret = xmlStrcat(ret, segment);
1797
0
        ret = xmlStrcat(ret, BAD_CAST "@");
1798
0
        xmlFree(segment);
1799
0
    }
1800
1801
0
    if (uri->server) {
1802
0
        segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1803
0
        NULLCHK(segment)
1804
0
    if (uri->user == NULL)
1805
0
    ret = xmlStrcat(ret, BAD_CAST "//");
1806
0
        ret = xmlStrcat(ret, segment);
1807
0
        xmlFree(segment);
1808
0
    }
1809
1810
0
    if (uri->port) {
1811
0
        xmlChar port[10];
1812
1813
0
        snprintf((char *) port, 10, "%d", uri->port);
1814
0
        ret = xmlStrcat(ret, BAD_CAST ":");
1815
0
        ret = xmlStrcat(ret, port);
1816
0
    }
1817
1818
0
    if (uri->path) {
1819
0
        segment =
1820
0
            xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1821
0
        NULLCHK(segment)
1822
0
        ret = xmlStrcat(ret, segment);
1823
0
        xmlFree(segment);
1824
0
    }
1825
1826
0
    if (uri->query_raw) {
1827
0
        ret = xmlStrcat(ret, BAD_CAST "?");
1828
0
        ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1829
0
    }
1830
0
    else if (uri->query) {
1831
0
        segment =
1832
0
            xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1833
0
        NULLCHK(segment)
1834
0
        ret = xmlStrcat(ret, BAD_CAST "?");
1835
0
        ret = xmlStrcat(ret, segment);
1836
0
        xmlFree(segment);
1837
0
    }
1838
1839
0
    if (uri->opaque) {
1840
0
        segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1841
0
        NULLCHK(segment)
1842
0
        ret = xmlStrcat(ret, segment);
1843
0
        xmlFree(segment);
1844
0
    }
1845
1846
0
    if (uri->fragment) {
1847
0
        segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1848
0
        NULLCHK(segment)
1849
0
        ret = xmlStrcat(ret, BAD_CAST "#");
1850
0
        ret = xmlStrcat(ret, segment);
1851
0
        xmlFree(segment);
1852
0
    }
1853
1854
0
    xmlFreeURI(uri);
1855
0
#undef NULLCHK
1856
1857
0
    return (ret);
1858
0
}
1859
1860
/************************************************************************
1861
 *                  *
1862
 *      Public functions        *
1863
 *                  *
1864
 ************************************************************************/
1865
1866
/**
1867
 * xmlBuildURI:
1868
 * @URI:  the URI instance found in the document
1869
 * @base:  the base value
1870
 *
1871
 * Computes he final URI of the reference done by checking that
1872
 * the given URI is valid, and building the final URI using the
1873
 * base URI. This is processed according to section 5.2 of the
1874
 * RFC 2396
1875
 *
1876
 * 5.2. Resolving Relative References to Absolute Form
1877
 *
1878
 * Returns a new URI string (to be freed by the caller) or NULL in case
1879
 *         of error.
1880
 */
1881
xmlChar *
1882
8.80k
xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1883
8.80k
    xmlChar *val = NULL;
1884
8.80k
    int ret, len, indx, cur, out;
1885
8.80k
    xmlURIPtr ref = NULL;
1886
8.80k
    xmlURIPtr bas = NULL;
1887
8.80k
    xmlURIPtr res = NULL;
1888
1889
    /*
1890
     * 1) The URI reference is parsed into the potential four components and
1891
     *    fragment identifier, as described in Section 4.3.
1892
     *
1893
     *    NOTE that a completely empty URI is treated by modern browsers
1894
     *    as a reference to "." rather than as a synonym for the current
1895
     *    URI.  Should we do that here?
1896
     */
1897
8.80k
    if (URI == NULL)
1898
0
  ret = -1;
1899
8.80k
    else {
1900
8.80k
  if (*URI) {
1901
7.93k
      ref = xmlCreateURI();
1902
7.93k
      if (ref == NULL)
1903
0
    goto done;
1904
7.93k
      ret = xmlParseURIReference(ref, (const char *) URI);
1905
7.93k
  }
1906
875
  else
1907
875
      ret = 0;
1908
8.80k
    }
1909
8.80k
    if (ret != 0)
1910
1.54k
  goto done;
1911
7.26k
    if ((ref != NULL) && (ref->scheme != NULL)) {
1912
  /*
1913
   * The URI is absolute don't modify.
1914
   */
1915
46
  val = xmlStrdup(URI);
1916
46
  goto done;
1917
46
    }
1918
7.21k
    if (base == NULL)
1919
2.18k
  ret = -1;
1920
5.03k
    else {
1921
5.03k
  bas = xmlCreateURI();
1922
5.03k
  if (bas == NULL)
1923
0
      goto done;
1924
5.03k
  ret = xmlParseURIReference(bas, (const char *) base);
1925
5.03k
    }
1926
7.21k
    if (ret != 0) {
1927
2.18k
  if (ref)
1928
1.90k
      val = xmlSaveUri(ref);
1929
2.18k
  goto done;
1930
2.18k
    }
1931
5.03k
    if (ref == NULL) {
1932
  /*
1933
   * the base fragment must be ignored
1934
   */
1935
596
  if (bas->fragment != NULL) {
1936
0
      xmlFree(bas->fragment);
1937
0
      bas->fragment = NULL;
1938
0
  }
1939
596
  val = xmlSaveUri(bas);
1940
596
  goto done;
1941
596
    }
1942
1943
    /*
1944
     * 2) If the path component is empty and the scheme, authority, and
1945
     *    query components are undefined, then it is a reference to the
1946
     *    current document and we are done.  Otherwise, the reference URI's
1947
     *    query and fragment components are defined as found (or not found)
1948
     *    within the URI reference and not inherited from the base URI.
1949
     *
1950
     *    NOTE that in modern browsers, the parsing differs from the above
1951
     *    in the following aspect:  the query component is allowed to be
1952
     *    defined while still treating this as a reference to the current
1953
     *    document.
1954
     */
1955
4.43k
    res = xmlCreateURI();
1956
4.43k
    if (res == NULL)
1957
0
  goto done;
1958
4.43k
    if ((ref->scheme == NULL) && (ref->path == NULL) &&
1959
1.62k
  ((ref->authority == NULL) && (ref->server == NULL))) {
1960
354
  if (bas->scheme != NULL)
1961
0
      res->scheme = xmlMemStrdup(bas->scheme);
1962
354
  if (bas->authority != NULL)
1963
0
      res->authority = xmlMemStrdup(bas->authority);
1964
354
  else if ((bas->server != NULL) || (bas->port == -1)) {
1965
0
      if (bas->server != NULL)
1966
0
    res->server = xmlMemStrdup(bas->server);
1967
0
      if (bas->user != NULL)
1968
0
    res->user = xmlMemStrdup(bas->user);
1969
0
      res->port = bas->port;
1970
0
  }
1971
354
  if (bas->path != NULL)
1972
51
      res->path = xmlMemStrdup(bas->path);
1973
354
  if (ref->query_raw != NULL)
1974
90
      res->query_raw = xmlMemStrdup (ref->query_raw);
1975
264
  else if (ref->query != NULL)
1976
0
      res->query = xmlMemStrdup(ref->query);
1977
264
  else if (bas->query_raw != NULL)
1978
0
      res->query_raw = xmlMemStrdup(bas->query_raw);
1979
264
  else if (bas->query != NULL)
1980
0
      res->query = xmlMemStrdup(bas->query);
1981
354
  if (ref->fragment != NULL)
1982
275
      res->fragment = xmlMemStrdup(ref->fragment);
1983
354
  goto step_7;
1984
354
    }
1985
1986
    /*
1987
     * 3) If the scheme component is defined, indicating that the reference
1988
     *    starts with a scheme name, then the reference is interpreted as an
1989
     *    absolute URI and we are done.  Otherwise, the reference URI's
1990
     *    scheme is inherited from the base URI's scheme component.
1991
     */
1992
4.08k
    if (ref->scheme != NULL) {
1993
0
  val = xmlSaveUri(ref);
1994
0
  goto done;
1995
0
    }
1996
4.08k
    if (bas->scheme != NULL)
1997
0
  res->scheme = xmlMemStrdup(bas->scheme);
1998
1999
4.08k
    if (ref->query_raw != NULL)
2000
575
  res->query_raw = xmlMemStrdup(ref->query_raw);
2001
3.50k
    else if (ref->query != NULL)
2002
0
  res->query = xmlMemStrdup(ref->query);
2003
4.08k
    if (ref->fragment != NULL)
2004
1.23k
  res->fragment = xmlMemStrdup(ref->fragment);
2005
2006
    /*
2007
     * 4) If the authority component is defined, then the reference is a
2008
     *    network-path and we skip to step 7.  Otherwise, the reference
2009
     *    URI's authority is inherited from the base URI's authority
2010
     *    component, which will also be undefined if the URI scheme does not
2011
     *    use an authority component.
2012
     */
2013
4.08k
    if ((ref->authority != NULL) || (ref->server != NULL)) {
2014
1.60k
  if (ref->authority != NULL)
2015
0
      res->authority = xmlMemStrdup(ref->authority);
2016
1.60k
  else {
2017
1.60k
      res->server = xmlMemStrdup(ref->server);
2018
1.60k
      if (ref->user != NULL)
2019
970
    res->user = xmlMemStrdup(ref->user);
2020
1.60k
            res->port = ref->port;
2021
1.60k
  }
2022
1.60k
  if (ref->path != NULL)
2023
336
      res->path = xmlMemStrdup(ref->path);
2024
1.60k
  goto step_7;
2025
1.60k
    }
2026
2.47k
    if (bas->authority != NULL)
2027
0
  res->authority = xmlMemStrdup(bas->authority);
2028
2.47k
    else if ((bas->server != NULL) || (bas->port == -1)) {
2029
0
  if (bas->server != NULL)
2030
0
      res->server = xmlMemStrdup(bas->server);
2031
0
  if (bas->user != NULL)
2032
0
      res->user = xmlMemStrdup(bas->user);
2033
0
  res->port = bas->port;
2034
0
    }
2035
2036
    /*
2037
     * 5) If the path component begins with a slash character ("/"), then
2038
     *    the reference is an absolute-path and we skip to step 7.
2039
     */
2040
2.47k
    if ((ref->path != NULL) && (ref->path[0] == '/')) {
2041
361
  res->path = xmlMemStrdup(ref->path);
2042
361
  goto step_7;
2043
361
    }
2044
2045
2046
    /*
2047
     * 6) If this step is reached, then we are resolving a relative-path
2048
     *    reference.  The relative path needs to be merged with the base
2049
     *    URI's path.  Although there are many ways to do this, we will
2050
     *    describe a simple method using a separate string buffer.
2051
     *
2052
     * Allocate a buffer large enough for the result string.
2053
     */
2054
2.11k
    len = 2; /* extra / and 0 */
2055
2.11k
    if (ref->path != NULL)
2056
2.11k
  len += strlen(ref->path);
2057
2.11k
    if (bas->path != NULL)
2058
243
  len += strlen(bas->path);
2059
2.11k
    res->path = (char *) xmlMallocAtomic(len);
2060
2.11k
    if (res->path == NULL) {
2061
0
        xmlURIErrMemory("resolving URI against base\n");
2062
0
  goto done;
2063
0
    }
2064
2.11k
    res->path[0] = 0;
2065
2066
    /*
2067
     * a) All but the last segment of the base URI's path component is
2068
     *    copied to the buffer.  In other words, any characters after the
2069
     *    last (right-most) slash character, if any, are excluded.
2070
     */
2071
2.11k
    cur = 0;
2072
2.11k
    out = 0;
2073
2.11k
    if (bas->path != NULL) {
2074
972
  while (bas->path[cur] != 0) {
2075
9.96k
      while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2076
8.99k
    cur++;
2077
972
      if (bas->path[cur] == 0)
2078
243
    break;
2079
2080
729
      cur++;
2081
4.37k
      while (out < cur) {
2082
3.64k
    res->path[out] = bas->path[out];
2083
3.64k
    out++;
2084
3.64k
      }
2085
729
  }
2086
243
    }
2087
2.11k
    res->path[out] = 0;
2088
2089
    /*
2090
     * b) The reference's path component is appended to the buffer
2091
     *    string.
2092
     */
2093
2.11k
    if (ref->path != NULL && ref->path[0] != 0) {
2094
2.10k
  indx = 0;
2095
  /*
2096
   * Ensure the path includes a '/'
2097
   */
2098
2.10k
  if ((out == 0) && (bas->server != NULL))
2099
0
      res->path[out++] = '/';
2100
152k
  while (ref->path[indx] != 0) {
2101
150k
      res->path[out++] = ref->path[indx++];
2102
150k
  }
2103
2.10k
    }
2104
2.11k
    res->path[out] = 0;
2105
2106
    /*
2107
     * Steps c) to h) are really path normalization steps
2108
     */
2109
2.11k
    xmlNormalizeURIPath(res->path);
2110
2111
4.43k
step_7:
2112
2113
    /*
2114
     * 7) The resulting URI components, including any inherited from the
2115
     *    base URI, are recombined to give the absolute form of the URI
2116
     *    reference.
2117
     */
2118
4.43k
    val = xmlSaveUri(res);
2119
2120
8.80k
done:
2121
8.80k
    if (ref != NULL)
2122
7.93k
  xmlFreeURI(ref);
2123
8.80k
    if (bas != NULL)
2124
5.03k
  xmlFreeURI(bas);
2125
8.80k
    if (res != NULL)
2126
4.43k
  xmlFreeURI(res);
2127
8.80k
    return(val);
2128
4.43k
}
2129
2130
/**
2131
 * xmlBuildRelativeURI:
2132
 * @URI:  the URI reference under consideration
2133
 * @base:  the base value
2134
 *
2135
 * Expresses the URI of the reference in terms relative to the
2136
 * base.  Some examples of this operation include:
2137
 *     base = "http://site1.com/docs/book1.html"
2138
 *        URI input                        URI returned
2139
 *     docs/pic1.gif                    pic1.gif
2140
 *     docs/img/pic1.gif                img/pic1.gif
2141
 *     img/pic1.gif                     ../img/pic1.gif
2142
 *     http://site1.com/docs/pic1.gif   pic1.gif
2143
 *     http://site2.com/docs/pic1.gif   http://site2.com/docs/pic1.gif
2144
 *
2145
 *     base = "docs/book1.html"
2146
 *        URI input                        URI returned
2147
 *     docs/pic1.gif                    pic1.gif
2148
 *     docs/img/pic1.gif                img/pic1.gif
2149
 *     img/pic1.gif                     ../img/pic1.gif
2150
 *     http://site1.com/docs/pic1.gif   http://site1.com/docs/pic1.gif
2151
 *
2152
 *
2153
 * Note: if the URI reference is really wierd or complicated, it may be
2154
 *       worthwhile to first convert it into a "nice" one by calling
2155
 *       xmlBuildURI (using 'base') before calling this routine,
2156
 *       since this routine (for reasonable efficiency) assumes URI has
2157
 *       already been through some validation.
2158
 *
2159
 * Returns a new URI string (to be freed by the caller) or NULL in case
2160
 * error.
2161
 */
2162
xmlChar *
2163
xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2164
0
{
2165
0
    xmlChar *val = NULL;
2166
0
    int ret;
2167
0
    int ix;
2168
0
    int nbslash = 0;
2169
0
    int len;
2170
0
    xmlURIPtr ref = NULL;
2171
0
    xmlURIPtr bas = NULL;
2172
0
    xmlChar *bptr, *uptr, *vptr;
2173
0
    int remove_path = 0;
2174
2175
0
    if ((URI == NULL) || (*URI == 0))
2176
0
  return NULL;
2177
2178
    /*
2179
     * First parse URI into a standard form
2180
     */
2181
0
    ref = xmlCreateURI ();
2182
0
    if (ref == NULL)
2183
0
  return NULL;
2184
    /* If URI not already in "relative" form */
2185
0
    if (URI[0] != '.') {
2186
0
  ret = xmlParseURIReference (ref, (const char *) URI);
2187
0
  if (ret != 0)
2188
0
      goto done;   /* Error in URI, return NULL */
2189
0
    } else
2190
0
  ref->path = (char *)xmlStrdup(URI);
2191
2192
    /*
2193
     * Next parse base into the same standard form
2194
     */
2195
0
    if ((base == NULL) || (*base == 0)) {
2196
0
  val = xmlStrdup (URI);
2197
0
  goto done;
2198
0
    }
2199
0
    bas = xmlCreateURI ();
2200
0
    if (bas == NULL)
2201
0
  goto done;
2202
0
    if (base[0] != '.') {
2203
0
  ret = xmlParseURIReference (bas, (const char *) base);
2204
0
  if (ret != 0)
2205
0
      goto done;   /* Error in base, return NULL */
2206
0
    } else
2207
0
  bas->path = (char *)xmlStrdup(base);
2208
2209
    /*
2210
     * If the scheme / server on the URI differs from the base,
2211
     * just return the URI
2212
     */
2213
0
    if ((ref->scheme != NULL) &&
2214
0
  ((bas->scheme == NULL) ||
2215
0
   (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2216
0
   (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
2217
0
  val = xmlStrdup (URI);
2218
0
  goto done;
2219
0
    }
2220
0
    if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2221
0
  val = xmlStrdup(BAD_CAST "");
2222
0
  goto done;
2223
0
    }
2224
0
    if (bas->path == NULL) {
2225
0
  val = xmlStrdup((xmlChar *)ref->path);
2226
0
  goto done;
2227
0
    }
2228
0
    if (ref->path == NULL) {
2229
0
        ref->path = (char *) "/";
2230
0
  remove_path = 1;
2231
0
    }
2232
2233
    /*
2234
     * At this point (at last!) we can compare the two paths
2235
     *
2236
     * First we take care of the special case where either of the
2237
     * two path components may be missing (bug 316224)
2238
     */
2239
0
    if (bas->path == NULL) {
2240
0
  if (ref->path != NULL) {
2241
0
      uptr = (xmlChar *) ref->path;
2242
0
      if (*uptr == '/')
2243
0
    uptr++;
2244
      /* exception characters from xmlSaveUri */
2245
0
      val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2246
0
  }
2247
0
  goto done;
2248
0
    }
2249
0
    bptr = (xmlChar *)bas->path;
2250
0
    if (ref->path == NULL) {
2251
0
  for (ix = 0; bptr[ix] != 0; ix++) {
2252
0
      if (bptr[ix] == '/')
2253
0
    nbslash++;
2254
0
  }
2255
0
  uptr = NULL;
2256
0
  len = 1;  /* this is for a string terminator only */
2257
0
    } else {
2258
0
        xmlChar *rptr = (xmlChar *) ref->path;
2259
0
        int pos = 0;
2260
2261
        /*
2262
         * Next we compare the two strings and find where they first differ
2263
         */
2264
0
  if ((*rptr == '.') && (rptr[1] == '/'))
2265
0
            rptr += 2;
2266
0
  if ((*bptr == '.') && (bptr[1] == '/'))
2267
0
            bptr += 2;
2268
0
  else if ((*bptr == '/') && (*rptr != '/'))
2269
0
      bptr++;
2270
0
  while ((bptr[pos] == rptr[pos]) && (bptr[pos] != 0))
2271
0
      pos++;
2272
2273
0
  if (bptr[pos] == rptr[pos]) {
2274
0
      val = xmlStrdup(BAD_CAST "");
2275
0
      goto done;    /* (I can't imagine why anyone would do this) */
2276
0
  }
2277
2278
  /*
2279
   * In URI, "back up" to the last '/' encountered.  This will be the
2280
   * beginning of the "unique" suffix of URI
2281
   */
2282
0
  ix = pos;
2283
0
  if ((rptr[ix] == '/') && (ix > 0))
2284
0
      ix--;
2285
0
  else if ((rptr[ix] == 0) && (ix > 1) && (rptr[ix - 1] == '/'))
2286
0
      ix -= 2;
2287
0
  for (; ix > 0; ix--) {
2288
0
      if (rptr[ix] == '/')
2289
0
    break;
2290
0
  }
2291
0
  if (ix == 0) {
2292
0
      uptr = (xmlChar *)rptr;
2293
0
  } else {
2294
0
      ix++;
2295
0
      uptr = (xmlChar *)&rptr[ix];
2296
0
  }
2297
2298
  /*
2299
   * In base, count the number of '/' from the differing point
2300
   */
2301
0
  if (bptr[pos] != rptr[pos]) {/* check for trivial URI == base */
2302
0
      for (; bptr[ix] != 0; ix++) {
2303
0
    if (bptr[ix] == '/')
2304
0
        nbslash++;
2305
0
      }
2306
0
  }
2307
0
  len = xmlStrlen (uptr) + 1;
2308
0
    }
2309
2310
0
    if (nbslash == 0) {
2311
0
  if (uptr != NULL)
2312
      /* exception characters from xmlSaveUri */
2313
0
      val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2314
0
  goto done;
2315
0
    }
2316
2317
    /*
2318
     * Allocate just enough space for the returned string -
2319
     * length of the remainder of the URI, plus enough space
2320
     * for the "../" groups, plus one for the terminator
2321
     */
2322
0
    val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2323
0
    if (val == NULL) {
2324
0
        xmlURIErrMemory("building relative URI\n");
2325
0
  goto done;
2326
0
    }
2327
0
    vptr = val;
2328
    /*
2329
     * Put in as many "../" as needed
2330
     */
2331
0
    for (; nbslash>0; nbslash--) {
2332
0
  *vptr++ = '.';
2333
0
  *vptr++ = '.';
2334
0
  *vptr++ = '/';
2335
0
    }
2336
    /*
2337
     * Finish up with the end of the URI
2338
     */
2339
0
    if (uptr != NULL) {
2340
0
        if ((vptr > val) && (len > 0) &&
2341
0
      (uptr[0] == '/') && (vptr[-1] == '/')) {
2342
0
      memcpy (vptr, uptr + 1, len - 1);
2343
0
      vptr[len - 2] = 0;
2344
0
  } else {
2345
0
      memcpy (vptr, uptr, len);
2346
0
      vptr[len - 1] = 0;
2347
0
  }
2348
0
    } else {
2349
0
  vptr[len - 1] = 0;
2350
0
    }
2351
2352
    /* escape the freshly-built path */
2353
0
    vptr = val;
2354
  /* exception characters from xmlSaveUri */
2355
0
    val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2356
0
    xmlFree(vptr);
2357
2358
0
done:
2359
    /*
2360
     * Free the working variables
2361
     */
2362
0
    if (remove_path != 0)
2363
0
        ref->path = NULL;
2364
0
    if (ref != NULL)
2365
0
  xmlFreeURI (ref);
2366
0
    if (bas != NULL)
2367
0
  xmlFreeURI (bas);
2368
2369
0
    return val;
2370
0
}
2371
2372
/**
2373
 * xmlCanonicPath:
2374
 * @path:  the resource locator in a filesystem notation
2375
 *
2376
 * Constructs a canonic path from the specified path.
2377
 *
2378
 * Returns a new canonic path, or a duplicate of the path parameter if the
2379
 * construction fails. The caller is responsible for freeing the memory occupied
2380
 * by the returned string. If there is insufficient memory available, or the
2381
 * argument is NULL, the function returns NULL.
2382
 */
2383
#define IS_WINDOWS_PATH(p)          \
2384
  ((p != NULL) &&           \
2385
   (((p[0] >= 'a') && (p[0] <= 'z')) ||     \
2386
    ((p[0] >= 'A') && (p[0] <= 'Z'))) &&      \
2387
   (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2388
xmlChar *
2389
xmlCanonicPath(const xmlChar *path)
2390
98.5k
{
2391
/*
2392
 * For Windows implementations, additional work needs to be done to
2393
 * replace backslashes in pathnames with "forward slashes"
2394
 */
2395
#if defined(_WIN32) && !defined(__CYGWIN__)
2396
    int len = 0;
2397
    char *p = NULL;
2398
#endif
2399
98.5k
    xmlURIPtr uri;
2400
98.5k
    xmlChar *ret;
2401
98.5k
    const xmlChar *absuri;
2402
2403
98.5k
    if (path == NULL)
2404
0
  return(NULL);
2405
2406
#if defined(_WIN32)
2407
    /*
2408
     * We must not change the backslashes to slashes if the the path
2409
     * starts with \\?\
2410
     * Those paths can be up to 32k characters long.
2411
     * Was added specifically for OpenOffice, those paths can't be converted
2412
     * to URIs anyway.
2413
     */
2414
    if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
2415
        (path[3] == '\\') )
2416
  return xmlStrdup((const xmlChar *) path);
2417
#endif
2418
2419
  /* sanitize filename starting with // so it can be used as URI */
2420
98.5k
    if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2421
0
        path++;
2422
2423
98.5k
    if ((uri = xmlParseURI((const char *) path)) != NULL) {
2424
98.5k
  xmlFreeURI(uri);
2425
98.5k
  return xmlStrdup(path);
2426
98.5k
    }
2427
2428
    /* Check if this is an "absolute uri" */
2429
0
    absuri = xmlStrstr(path, BAD_CAST "://");
2430
0
    if (absuri != NULL) {
2431
0
        int l, j;
2432
0
  unsigned char c;
2433
0
  xmlChar *escURI;
2434
2435
        /*
2436
   * this looks like an URI where some parts have not been
2437
   * escaped leading to a parsing problem.  Check that the first
2438
   * part matches a protocol.
2439
   */
2440
0
  l = absuri - path;
2441
  /* Bypass if first part (part before the '://') is > 20 chars */
2442
0
  if ((l <= 0) || (l > 20))
2443
0
      goto path_processing;
2444
  /* Bypass if any non-alpha characters are present in first part */
2445
0
  for (j = 0;j < l;j++) {
2446
0
      c = path[j];
2447
0
      if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2448
0
          goto path_processing;
2449
0
  }
2450
2451
  /* Escape all except the characters specified in the supplied path */
2452
0
        escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2453
0
  if (escURI != NULL) {
2454
      /* Try parsing the escaped path */
2455
0
      uri = xmlParseURI((const char *) escURI);
2456
      /* If successful, return the escaped string */
2457
0
      if (uri != NULL) {
2458
0
          xmlFreeURI(uri);
2459
0
    return escURI;
2460
0
      }
2461
0
            xmlFree(escURI);
2462
0
  }
2463
0
    }
2464
2465
0
path_processing:
2466
/* For Windows implementations, replace backslashes with 'forward slashes' */
2467
#if defined(_WIN32) && !defined(__CYGWIN__)
2468
    /*
2469
     * Create a URI structure
2470
     */
2471
    uri = xmlCreateURI();
2472
    if (uri == NULL) {    /* Guard against 'out of memory' */
2473
        return(NULL);
2474
    }
2475
2476
    len = xmlStrlen(path);
2477
    if ((len > 2) && IS_WINDOWS_PATH(path)) {
2478
        /* make the scheme 'file' */
2479
  uri->scheme = (char *) xmlStrdup(BAD_CAST "file");
2480
  /* allocate space for leading '/' + path + string terminator */
2481
  uri->path = xmlMallocAtomic(len + 2);
2482
  if (uri->path == NULL) {
2483
      xmlFreeURI(uri);  /* Guard agains 'out of memory' */
2484
      return(NULL);
2485
  }
2486
  /* Put in leading '/' plus path */
2487
  uri->path[0] = '/';
2488
  p = uri->path + 1;
2489
  strncpy(p, (char *) path, len + 1);
2490
    } else {
2491
  uri->path = (char *) xmlStrdup(path);
2492
  if (uri->path == NULL) {
2493
      xmlFreeURI(uri);
2494
      return(NULL);
2495
  }
2496
  p = uri->path;
2497
    }
2498
    /* Now change all occurences of '\' to '/' */
2499
    while (*p != '\0') {
2500
  if (*p == '\\')
2501
      *p = '/';
2502
  p++;
2503
    }
2504
2505
    if (uri->scheme == NULL) {
2506
  ret = xmlStrdup((const xmlChar *) uri->path);
2507
    } else {
2508
  ret = xmlSaveUri(uri);
2509
    }
2510
2511
    xmlFreeURI(uri);
2512
#else
2513
0
    ret = xmlStrdup((const xmlChar *) path);
2514
0
#endif
2515
0
    return(ret);
2516
0
}
2517
2518
/**
2519
 * xmlPathToURI:
2520
 * @path:  the resource locator in a filesystem notation
2521
 *
2522
 * Constructs an URI expressing the existing path
2523
 *
2524
 * Returns a new URI, or a duplicate of the path parameter if the
2525
 * construction fails. The caller is responsible for freeing the memory
2526
 * occupied by the returned string. If there is insufficient memory available,
2527
 * or the argument is NULL, the function returns NULL.
2528
 */
2529
xmlChar *
2530
xmlPathToURI(const xmlChar *path)
2531
96.0k
{
2532
96.0k
    xmlURIPtr uri;
2533
96.0k
    xmlURI temp;
2534
96.0k
    xmlChar *ret, *cal;
2535
2536
96.0k
    if (path == NULL)
2537
0
        return(NULL);
2538
2539
96.0k
    if ((uri = xmlParseURI((const char *) path)) != NULL) {
2540
96.0k
  xmlFreeURI(uri);
2541
96.0k
  return xmlStrdup(path);
2542
96.0k
    }
2543
0
    cal = xmlCanonicPath(path);
2544
0
    if (cal == NULL)
2545
0
        return(NULL);
2546
#if defined(_WIN32) && !defined(__CYGWIN__)
2547
    /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2548
       If 'cal' is a valid URI allready then we are done here, as continuing would make
2549
       it invalid. */
2550
    if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2551
  xmlFreeURI(uri);
2552
  return cal;
2553
    }
2554
    /* 'cal' can contain a relative path with backslashes. If that is processed
2555
       by xmlSaveURI, they will be escaped and the external entity loader machinery
2556
       will fail. So convert them to slashes. Misuse 'ret' for walking. */
2557
    ret = cal;
2558
    while (*ret != '\0') {
2559
  if (*ret == '\\')
2560
      *ret = '/';
2561
  ret++;
2562
    }
2563
#endif
2564
0
    memset(&temp, 0, sizeof(temp));
2565
0
    temp.path = (char *) cal;
2566
0
    ret = xmlSaveUri(&temp);
2567
0
    xmlFree(cal);
2568
0
    return(ret);
2569
0
}
2570
#define bottom_uri
2571
#include "elfgcchack.h"