Coverage Report

Created: 2022-06-08 06:16

/src/libxml2/uri.c
Line
Count
Source (jump to first uncovered line)
1
/**
2
 * uri.c: set of generic URI related routines
3
 *
4
 * Reference: RFCs 3986, 2732 and 2373
5
 *
6
 * See Copyright for the status of this software.
7
 *
8
 * daniel@veillard.com
9
 */
10
11
#define IN_LIBXML
12
#include "libxml.h"
13
14
#include <limits.h>
15
#include <string.h>
16
17
#include <libxml/xmlmemory.h>
18
#include <libxml/uri.h>
19
#include <libxml/globals.h>
20
#include <libxml/xmlerror.h>
21
22
/**
23
 * MAX_URI_LENGTH:
24
 *
25
 * The definition of the URI regexp in the above RFC has no size limit
26
 * In practice they are usually relatively short except for the
27
 * data URI scheme as defined in RFC 2397. Even for data URI the usual
28
 * maximum size before hitting random practical limits is around 64 KB
29
 * and 4KB is usually a maximum admitted limit for proper operations.
30
 * The value below is more a security limit than anything else and
31
 * really should never be hit by 'normal' operations
32
 * Set to 1 MByte in 2012, this is only enforced on output
33
 */
34
1.15k
#define MAX_URI_LENGTH 1024 * 1024
35
36
static void
37
xmlURIErrMemory(const char *extra)
38
9
{
39
9
    if (extra)
40
9
        __xmlRaiseError(NULL, NULL, NULL,
41
9
                        NULL, NULL, XML_FROM_URI,
42
9
                        XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
43
9
                        extra, NULL, NULL, 0, 0,
44
9
                        "Memory allocation failed : %s\n", extra);
45
0
    else
46
0
        __xmlRaiseError(NULL, NULL, NULL,
47
0
                        NULL, NULL, XML_FROM_URI,
48
0
                        XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
49
0
                        NULL, NULL, NULL, 0, 0,
50
0
                        "Memory allocation failed\n");
51
9
}
52
53
static void xmlCleanURI(xmlURIPtr uri);
54
55
/*
56
 * Old rule from 2396 used in legacy handling code
57
 * alpha    = lowalpha | upalpha
58
 */
59
13.5M
#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
60
61
62
/*
63
 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
64
 *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
65
 *            "u" | "v" | "w" | "x" | "y" | "z"
66
 */
67
68
13.5M
#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
69
70
/*
71
 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
72
 *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
73
 *           "U" | "V" | "W" | "X" | "Y" | "Z"
74
 */
75
6.78M
#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
76
77
#ifdef IS_DIGIT
78
#undef IS_DIGIT
79
#endif
80
/*
81
 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
82
 */
83
6.77M
#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
84
85
/*
86
 * alphanum = alpha | digit
87
 */
88
89
13.5M
#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
90
91
/*
92
 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
93
 */
94
95
6.77M
#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||     \
96
6.77M
    ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||    \
97
6.77M
    ((x) == '(') || ((x) == ')'))
98
99
/*
100
 * unwise = "{" | "}" | "|" | "\" | "^" | "`"
101
 */
102
103
#define IS_UNWISE(p)                                                    \
104
0
      (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \
105
0
       ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||        \
106
0
       ((*(p) == ']')) || ((*(p) == '`')))
107
/*
108
 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
109
 *            "[" | "]"
110
 */
111
112
0
#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
113
0
        ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
114
0
        ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
115
0
        ((x) == ']'))
116
117
/*
118
 * unreserved = alphanum | mark
119
 */
120
121
6.78M
#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
122
123
/*
124
 * Skip to next pointer char, handle escaped sequences
125
 */
126
127
1.15M
#define NEXT(p) ((*p == '%')? p += 3 : p++)
128
129
/*
130
 * Productions from the spec.
131
 *
132
 *    authority     = server | reg_name
133
 *    reg_name      = 1*( unreserved | escaped | "$" | "," |
134
 *                        ";" | ":" | "@" | "&" | "=" | "+" )
135
 *
136
 * path          = [ abs_path | opaque_part ]
137
 */
138
139
77.3k
#define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
140
141
/************************************************************************
142
 *                  *
143
 *                         RFC 3986 parser        *
144
 *                  *
145
 ************************************************************************/
146
147
1.54M
#define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
148
1.93M
#define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||   \
149
1.93M
                      ((*(p) >= 'A') && (*(p) <= 'Z')))
150
#define ISA_HEXDIG(p)             \
151
86.0k
       (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||   \
152
86.0k
        ((*(p) >= 'A') && (*(p) <= 'F')))
153
154
/*
155
 *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
156
 *                     / "*" / "+" / "," / ";" / "="
157
 */
158
#define ISA_SUB_DELIM(p)            \
159
1.88M
      (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||   \
160
544k
       ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||   \
161
544k
       ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||   \
162
544k
       ((*(p) == '=')) || ((*(p) == '\'')))
163
164
/*
165
 *    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
166
 */
167
#define ISA_GEN_DELIM(p)            \
168
      (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) ||         \
169
       ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) ||         \
170
       ((*(p) == '@')))
171
172
/*
173
 *    reserved      = gen-delims / sub-delims
174
 */
175
#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
176
177
/*
178
 *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
179
 */
180
#define ISA_UNRESERVED(p)           \
181
2.99M
      ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||   \
182
1.49M
       ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
183
184
/*
185
 *    pct-encoded   = "%" HEXDIG HEXDIG
186
 */
187
#define ISA_PCT_ENCODED(p)            \
188
2.05M
     ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
189
190
/*
191
 *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
192
 */
193
#define ISA_PCHAR(p)              \
194
1.75M
     (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||  \
195
1.14M
      ((*(p) == ':')) || ((*(p) == '@')))
196
197
/**
198
 * xmlParse3986Scheme:
199
 * @uri:  pointer to an URI structure
200
 * @str:  pointer to the string to analyze
201
 *
202
 * Parse an URI scheme
203
 *
204
 * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
205
 *
206
 * Returns 0 or the error code
207
 */
208
static int
209
141k
xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
210
141k
    const char *cur;
211
212
141k
    if (str == NULL)
213
0
  return(-1);
214
215
141k
    cur = *str;
216
141k
    if (!ISA_ALPHA(cur))
217
75.3k
  return(2);
218
65.8k
    cur++;
219
147k
    while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
220
147k
           (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
221
65.8k
    if (uri != NULL) {
222
65.8k
  if (uri->scheme != NULL) xmlFree(uri->scheme);
223
65.8k
  uri->scheme = STRNDUP(*str, cur - *str);
224
65.8k
    }
225
65.8k
    *str = cur;
226
65.8k
    return(0);
227
141k
}
228
229
/**
230
 * xmlParse3986Fragment:
231
 * @uri:  pointer to an URI structure
232
 * @str:  pointer to the string to analyze
233
 *
234
 * Parse the query part of an URI
235
 *
236
 * fragment      = *( pchar / "/" / "?" )
237
 * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
238
 *       in the fragment identifier but this is used very broadly for
239
 *       xpointer scheme selection, so we are allowing it here to not break
240
 *       for example all the DocBook processing chains.
241
 *
242
 * Returns 0 or the error code
243
 */
244
static int
245
xmlParse3986Fragment(xmlURIPtr uri, const char **str)
246
17.4k
{
247
17.4k
    const char *cur;
248
249
17.4k
    if (str == NULL)
250
0
        return (-1);
251
252
17.4k
    cur = *str;
253
254
146k
    while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
255
146k
           (*cur == '[') || (*cur == ']') ||
256
146k
           ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
257
128k
        NEXT(cur);
258
17.4k
    if (uri != NULL) {
259
17.4k
        if (uri->fragment != NULL)
260
0
            xmlFree(uri->fragment);
261
17.4k
  if (uri->cleanup & 2)
262
0
      uri->fragment = STRNDUP(*str, cur - *str);
263
17.4k
  else
264
17.4k
      uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
265
17.4k
    }
266
17.4k
    *str = cur;
267
17.4k
    return (0);
268
17.4k
}
269
270
/**
271
 * xmlParse3986Query:
272
 * @uri:  pointer to an URI structure
273
 * @str:  pointer to the string to analyze
274
 *
275
 * Parse the query part of an URI
276
 *
277
 * query = *uric
278
 *
279
 * Returns 0 or the error code
280
 */
281
static int
282
xmlParse3986Query(xmlURIPtr uri, const char **str)
283
11.4k
{
284
11.4k
    const char *cur;
285
286
11.4k
    if (str == NULL)
287
0
        return (-1);
288
289
11.4k
    cur = *str;
290
291
80.2k
    while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
292
80.2k
           ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
293
68.7k
        NEXT(cur);
294
11.4k
    if (uri != NULL) {
295
11.4k
        if (uri->query != NULL)
296
0
            xmlFree(uri->query);
297
11.4k
  if (uri->cleanup & 2)
298
0
      uri->query = STRNDUP(*str, cur - *str);
299
11.4k
  else
300
11.4k
      uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
301
302
  /* Save the raw bytes of the query as well.
303
   * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
304
   */
305
11.4k
  if (uri->query_raw != NULL)
306
0
      xmlFree (uri->query_raw);
307
11.4k
  uri->query_raw = STRNDUP (*str, cur - *str);
308
11.4k
    }
309
11.4k
    *str = cur;
310
11.4k
    return (0);
311
11.4k
}
312
313
/**
314
 * xmlParse3986Port:
315
 * @uri:  pointer to an URI structure
316
 * @str:  the string to analyze
317
 *
318
 * Parse a port part and fills in the appropriate fields
319
 * of the @uri structure
320
 *
321
 * port          = *DIGIT
322
 *
323
 * Returns 0 or the error code
324
 */
325
static int
326
xmlParse3986Port(xmlURIPtr uri, const char **str)
327
5.90k
{
328
5.90k
    const char *cur = *str;
329
5.90k
    int port = 0;
330
331
5.90k
    if (ISA_DIGIT(cur)) {
332
31.6k
  while (ISA_DIGIT(cur)) {
333
29.2k
            int digit = *cur - '0';
334
335
29.2k
            if (port > INT_MAX / 10)
336
1.11k
                return(1);
337
28.1k
            port *= 10;
338
28.1k
            if (port > INT_MAX - digit)
339
756
                return(1);
340
27.3k
      port += digit;
341
342
27.3k
      cur++;
343
27.3k
  }
344
2.35k
  if (uri != NULL)
345
2.35k
      uri->port = port;
346
2.35k
  *str = cur;
347
2.35k
  return(0);
348
4.22k
    }
349
1.67k
    return(1);
350
5.90k
}
351
352
/**
353
 * xmlParse3986Userinfo:
354
 * @uri:  pointer to an URI structure
355
 * @str:  the string to analyze
356
 *
357
 * Parse an user information part and fills in the appropriate fields
358
 * of the @uri structure
359
 *
360
 * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
361
 *
362
 * Returns 0 or the error code
363
 */
364
static int
365
xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
366
30.5k
{
367
30.5k
    const char *cur;
368
369
30.5k
    cur = *str;
370
199k
    while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
371
199k
           ISA_SUB_DELIM(cur) || (*cur == ':'))
372
169k
  NEXT(cur);
373
30.5k
    if (*cur == '@') {
374
2.16k
  if (uri != NULL) {
375
2.16k
      if (uri->user != NULL) xmlFree(uri->user);
376
2.16k
      if (uri->cleanup & 2)
377
0
    uri->user = STRNDUP(*str, cur - *str);
378
2.16k
      else
379
2.16k
    uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
380
2.16k
  }
381
2.16k
  *str = cur;
382
2.16k
  return(0);
383
2.16k
    }
384
28.4k
    return(1);
385
30.5k
}
386
387
/**
388
 * xmlParse3986DecOctet:
389
 * @str:  the string to analyze
390
 *
391
 *    dec-octet     = DIGIT                 ; 0-9
392
 *                  / %x31-39 DIGIT         ; 10-99
393
 *                  / "1" 2DIGIT            ; 100-199
394
 *                  / "2" %x30-34 DIGIT     ; 200-249
395
 *                  / "25" %x30-35          ; 250-255
396
 *
397
 * Skip a dec-octet.
398
 *
399
 * Returns 0 if found and skipped, 1 otherwise
400
 */
401
static int
402
6.76k
xmlParse3986DecOctet(const char **str) {
403
6.76k
    const char *cur = *str;
404
405
6.76k
    if (!(ISA_DIGIT(cur)))
406
225
        return(1);
407
6.53k
    if (!ISA_DIGIT(cur+1))
408
1.77k
  cur++;
409
4.76k
    else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
410
2.30k
  cur += 2;
411
2.46k
    else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
412
187
  cur += 3;
413
2.27k
    else if ((*cur == '2') && (*(cur + 1) >= '0') &&
414
2.27k
       (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
415
189
  cur += 3;
416
2.08k
    else if ((*cur == '2') && (*(cur + 1) == '5') &&
417
2.08k
       (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
418
80
  cur += 3;
419
2.00k
    else
420
2.00k
        return(1);
421
4.52k
    *str = cur;
422
4.52k
    return(0);
423
6.53k
}
424
/**
425
 * xmlParse3986Host:
426
 * @uri:  pointer to an URI structure
427
 * @str:  the string to analyze
428
 *
429
 * Parse an host part and fills in the appropriate fields
430
 * of the @uri structure
431
 *
432
 * host          = IP-literal / IPv4address / reg-name
433
 * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
434
 * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
435
 * reg-name      = *( unreserved / pct-encoded / sub-delims )
436
 *
437
 * Returns 0 or the error code
438
 */
439
static int
440
xmlParse3986Host(xmlURIPtr uri, const char **str)
441
30.5k
{
442
30.5k
    const char *cur = *str;
443
30.5k
    const char *host;
444
445
30.5k
    host = cur;
446
    /*
447
     * IPv6 and future addressing scheme are enclosed between brackets
448
     */
449
30.5k
    if (*cur == '[') {
450
1.17k
        cur++;
451
37.3M
  while ((*cur != ']') && (*cur != 0))
452
37.3M
      cur++;
453
1.17k
  if (*cur != ']')
454
602
      return(1);
455
573
  cur++;
456
573
  goto found;
457
1.17k
    }
458
    /*
459
     * try to parse an IPv4
460
     */
461
29.4k
    if (ISA_DIGIT(cur)) {
462
6.18k
        if (xmlParse3986DecOctet(&cur) != 0)
463
2.00k
      goto not_ipv4;
464
4.18k
  if (*cur != '.')
465
3.72k
      goto not_ipv4;
466
452
  cur++;
467
452
        if (xmlParse3986DecOctet(&cur) != 0)
468
105
      goto not_ipv4;
469
347
  if (*cur != '.')
470
226
      goto not_ipv4;
471
121
        if (xmlParse3986DecOctet(&cur) != 0)
472
121
      goto not_ipv4;
473
0
  if (*cur != '.')
474
0
      goto not_ipv4;
475
0
        if (xmlParse3986DecOctet(&cur) != 0)
476
0
      goto not_ipv4;
477
0
  goto found;
478
6.18k
not_ipv4:
479
6.18k
        cur = *str;
480
6.18k
    }
481
    /*
482
     * then this should be a hostname which can be empty
483
     */
484
152k
    while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
485
122k
        NEXT(cur);
486
29.9k
found:
487
29.9k
    if (uri != NULL) {
488
29.9k
  if (uri->authority != NULL) xmlFree(uri->authority);
489
29.9k
  uri->authority = NULL;
490
29.9k
  if (uri->server != NULL) xmlFree(uri->server);
491
29.9k
  if (cur != host) {
492
16.9k
      if (uri->cleanup & 2)
493
0
    uri->server = STRNDUP(host, cur - host);
494
16.9k
      else
495
16.9k
    uri->server = xmlURIUnescapeString(host, cur - host, NULL);
496
16.9k
  } else
497
13.0k
      uri->server = NULL;
498
29.9k
    }
499
29.9k
    *str = cur;
500
29.9k
    return(0);
501
29.4k
}
502
503
/**
504
 * xmlParse3986Authority:
505
 * @uri:  pointer to an URI structure
506
 * @str:  the string to analyze
507
 *
508
 * Parse an authority part and fills in the appropriate fields
509
 * of the @uri structure
510
 *
511
 * authority     = [ userinfo "@" ] host [ ":" port ]
512
 *
513
 * Returns 0 or the error code
514
 */
515
static int
516
xmlParse3986Authority(xmlURIPtr uri, const char **str)
517
30.5k
{
518
30.5k
    const char *cur;
519
30.5k
    int ret;
520
521
30.5k
    cur = *str;
522
    /*
523
     * try to parse an userinfo and check for the trailing @
524
     */
525
30.5k
    ret = xmlParse3986Userinfo(uri, &cur);
526
30.5k
    if ((ret != 0) || (*cur != '@'))
527
28.4k
        cur = *str;
528
2.16k
    else
529
2.16k
        cur++;
530
30.5k
    ret = xmlParse3986Host(uri, &cur);
531
30.5k
    if (ret != 0) return(ret);
532
29.9k
    if (*cur == ':') {
533
5.90k
        cur++;
534
5.90k
        ret = xmlParse3986Port(uri, &cur);
535
5.90k
  if (ret != 0) return(ret);
536
5.90k
    }
537
26.4k
    *str = cur;
538
26.4k
    return(0);
539
29.9k
}
540
541
/**
542
 * xmlParse3986Segment:
543
 * @str:  the string to analyze
544
 * @forbid: an optional forbidden character
545
 * @empty: allow an empty segment
546
 *
547
 * Parse a segment and fills in the appropriate fields
548
 * of the @uri structure
549
 *
550
 * segment       = *pchar
551
 * segment-nz    = 1*pchar
552
 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
553
 *               ; non-zero-length segment without any colon ":"
554
 *
555
 * Returns 0 or the error code
556
 */
557
static int
558
xmlParse3986Segment(const char **str, char forbid, int empty)
559
178k
{
560
178k
    const char *cur;
561
562
178k
    cur = *str;
563
178k
    if (!ISA_PCHAR(cur)) {
564
51.1k
        if (empty)
565
48.4k
      return(0);
566
2.68k
  return(1);
567
51.1k
    }
568
611k
    while (ISA_PCHAR(cur) && (*cur != forbid))
569
483k
        NEXT(cur);
570
127k
    *str = cur;
571
127k
    return (0);
572
178k
}
573
574
/**
575
 * xmlParse3986PathAbEmpty:
576
 * @uri:  pointer to an URI structure
577
 * @str:  the string to analyze
578
 *
579
 * Parse an path absolute or empty and fills in the appropriate fields
580
 * of the @uri structure
581
 *
582
 * path-abempty  = *( "/" segment )
583
 *
584
 * Returns 0 or the error code
585
 */
586
static int
587
xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
588
26.4k
{
589
26.4k
    const char *cur;
590
26.4k
    int ret;
591
592
26.4k
    cur = *str;
593
594
39.3k
    while (*cur == '/') {
595
12.9k
        cur++;
596
12.9k
  ret = xmlParse3986Segment(&cur, 0, 1);
597
12.9k
  if (ret != 0) return(ret);
598
12.9k
    }
599
26.4k
    if (uri != NULL) {
600
26.4k
  if (uri->path != NULL) xmlFree(uri->path);
601
26.4k
        if (*str != cur) {
602
4.08k
            if (uri->cleanup & 2)
603
0
                uri->path = STRNDUP(*str, cur - *str);
604
4.08k
            else
605
4.08k
                uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
606
22.3k
        } else {
607
22.3k
            uri->path = NULL;
608
22.3k
        }
609
26.4k
    }
610
26.4k
    *str = cur;
611
26.4k
    return (0);
612
26.4k
}
613
614
/**
615
 * xmlParse3986PathAbsolute:
616
 * @uri:  pointer to an URI structure
617
 * @str:  the string to analyze
618
 *
619
 * Parse an path absolute and fills in the appropriate fields
620
 * of the @uri structure
621
 *
622
 * path-absolute = "/" [ segment-nz *( "/" segment ) ]
623
 *
624
 * Returns 0 or the error code
625
 */
626
static int
627
xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
628
6.50k
{
629
6.50k
    const char *cur;
630
6.50k
    int ret;
631
632
6.50k
    cur = *str;
633
634
6.50k
    if (*cur != '/')
635
0
        return(1);
636
6.50k
    cur++;
637
6.50k
    ret = xmlParse3986Segment(&cur, 0, 0);
638
6.50k
    if (ret == 0) {
639
5.66k
  while (*cur == '/') {
640
1.84k
      cur++;
641
1.84k
      ret = xmlParse3986Segment(&cur, 0, 1);
642
1.84k
      if (ret != 0) return(ret);
643
1.84k
  }
644
3.82k
    }
645
6.50k
    if (uri != NULL) {
646
6.50k
  if (uri->path != NULL) xmlFree(uri->path);
647
6.50k
        if (cur != *str) {
648
6.50k
            if (uri->cleanup & 2)
649
0
                uri->path = STRNDUP(*str, cur - *str);
650
6.50k
            else
651
6.50k
                uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
652
6.50k
        } else {
653
0
            uri->path = NULL;
654
0
        }
655
6.50k
    }
656
6.50k
    *str = cur;
657
6.50k
    return (0);
658
6.50k
}
659
660
/**
661
 * xmlParse3986PathRootless:
662
 * @uri:  pointer to an URI structure
663
 * @str:  the string to analyze
664
 *
665
 * Parse an path without root and fills in the appropriate fields
666
 * of the @uri structure
667
 *
668
 * path-rootless = segment-nz *( "/" segment )
669
 *
670
 * Returns 0 or the error code
671
 */
672
static int
673
xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
674
14.9k
{
675
14.9k
    const char *cur;
676
14.9k
    int ret;
677
678
14.9k
    cur = *str;
679
680
14.9k
    ret = xmlParse3986Segment(&cur, 0, 0);
681
14.9k
    if (ret != 0) return(ret);
682
20.0k
    while (*cur == '/') {
683
5.02k
        cur++;
684
5.02k
  ret = xmlParse3986Segment(&cur, 0, 1);
685
5.02k
  if (ret != 0) return(ret);
686
5.02k
    }
687
14.9k
    if (uri != NULL) {
688
14.9k
  if (uri->path != NULL) xmlFree(uri->path);
689
14.9k
        if (cur != *str) {
690
14.9k
            if (uri->cleanup & 2)
691
0
                uri->path = STRNDUP(*str, cur - *str);
692
14.9k
            else
693
14.9k
                uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
694
14.9k
        } else {
695
0
            uri->path = NULL;
696
0
        }
697
14.9k
    }
698
14.9k
    *str = cur;
699
14.9k
    return (0);
700
14.9k
}
701
702
/**
703
 * xmlParse3986PathNoScheme:
704
 * @uri:  pointer to an URI structure
705
 * @str:  the string to analyze
706
 *
707
 * Parse an path which is not a scheme and fills in the appropriate fields
708
 * of the @uri structure
709
 *
710
 * path-noscheme = segment-nz-nc *( "/" segment )
711
 *
712
 * Returns 0 or the error code
713
 */
714
static int
715
xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
716
90.2k
{
717
90.2k
    const char *cur;
718
90.2k
    int ret;
719
720
90.2k
    cur = *str;
721
722
90.2k
    ret = xmlParse3986Segment(&cur, ':', 0);
723
90.2k
    if (ret != 0) return(ret);
724
136k
    while (*cur == '/') {
725
46.7k
        cur++;
726
46.7k
  ret = xmlParse3986Segment(&cur, 0, 1);
727
46.7k
  if (ret != 0) return(ret);
728
46.7k
    }
729
90.2k
    if (uri != NULL) {
730
90.2k
  if (uri->path != NULL) xmlFree(uri->path);
731
90.2k
        if (cur != *str) {
732
88.5k
            if (uri->cleanup & 2)
733
0
                uri->path = STRNDUP(*str, cur - *str);
734
88.5k
            else
735
88.5k
                uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
736
88.5k
        } else {
737
1.64k
            uri->path = NULL;
738
1.64k
        }
739
90.2k
    }
740
90.2k
    *str = cur;
741
90.2k
    return (0);
742
90.2k
}
743
744
/**
745
 * xmlParse3986HierPart:
746
 * @uri:  pointer to an URI structure
747
 * @str:  the string to analyze
748
 *
749
 * Parse an hierarchical part and fills in the appropriate fields
750
 * of the @uri structure
751
 *
752
 * hier-part     = "//" authority path-abempty
753
 *                / path-absolute
754
 *                / path-rootless
755
 *                / path-empty
756
 *
757
 * Returns 0 or the error code
758
 */
759
static int
760
xmlParse3986HierPart(xmlURIPtr uri, const char **str)
761
42.1k
{
762
42.1k
    const char *cur;
763
42.1k
    int ret;
764
765
42.1k
    cur = *str;
766
767
42.1k
    if ((*cur == '/') && (*(cur + 1) == '/')) {
768
17.2k
        cur += 2;
769
17.2k
  ret = xmlParse3986Authority(uri, &cur);
770
17.2k
  if (ret != 0) return(ret);
771
14.3k
  if (uri->server == NULL)
772
2.83k
      uri->port = -1;
773
14.3k
  ret = xmlParse3986PathAbEmpty(uri, &cur);
774
14.3k
  if (ret != 0) return(ret);
775
14.3k
  *str = cur;
776
14.3k
  return(0);
777
24.8k
    } else if (*cur == '/') {
778
4.32k
        ret = xmlParse3986PathAbsolute(uri, &cur);
779
4.32k
  if (ret != 0) return(ret);
780
20.5k
    } else if (ISA_PCHAR(cur)) {
781
14.9k
        ret = xmlParse3986PathRootless(uri, &cur);
782
14.9k
  if (ret != 0) return(ret);
783
14.9k
    } else {
784
  /* path-empty is effectively empty */
785
5.56k
  if (uri != NULL) {
786
5.56k
      if (uri->path != NULL) xmlFree(uri->path);
787
5.56k
      uri->path = NULL;
788
5.56k
  }
789
5.56k
    }
790
24.8k
    *str = cur;
791
24.8k
    return (0);
792
42.1k
}
793
794
/**
795
 * xmlParse3986RelativeRef:
796
 * @uri:  pointer to an URI structure
797
 * @str:  the string to analyze
798
 *
799
 * Parse an URI string and fills in the appropriate fields
800
 * of the @uri structure
801
 *
802
 * relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
803
 * relative-part = "//" authority path-abempty
804
 *               / path-absolute
805
 *               / path-noscheme
806
 *               / path-empty
807
 *
808
 * Returns 0 or the error code
809
 */
810
static int
811
122k
xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
812
122k
    int ret;
813
814
122k
    if ((*str == '/') && (*(str + 1) == '/')) {
815
13.3k
        str += 2;
816
13.3k
  ret = xmlParse3986Authority(uri, &str);
817
13.3k
  if (ret != 0) return(ret);
818
12.0k
  ret = xmlParse3986PathAbEmpty(uri, &str);
819
12.0k
  if (ret != 0) return(ret);
820
109k
    } else if (*str == '/') {
821
2.18k
  ret = xmlParse3986PathAbsolute(uri, &str);
822
2.18k
  if (ret != 0) return(ret);
823
106k
    } else if (ISA_PCHAR(str)) {
824
90.2k
        ret = xmlParse3986PathNoScheme(uri, &str);
825
90.2k
  if (ret != 0) return(ret);
826
90.2k
    } else {
827
  /* path-empty is effectively empty */
828
16.6k
  if (uri != NULL) {
829
16.6k
      if (uri->path != NULL) xmlFree(uri->path);
830
16.6k
      uri->path = NULL;
831
16.6k
  }
832
16.6k
    }
833
834
121k
    if (*str == '?') {
835
8.60k
  str++;
836
8.60k
  ret = xmlParse3986Query(uri, &str);
837
8.60k
  if (ret != 0) return(ret);
838
8.60k
    }
839
121k
    if (*str == '#') {
840
11.6k
  str++;
841
11.6k
  ret = xmlParse3986Fragment(uri, &str);
842
11.6k
  if (ret != 0) return(ret);
843
11.6k
    }
844
121k
    if (*str != 0) {
845
76.3k
  xmlCleanURI(uri);
846
76.3k
  return(1);
847
76.3k
    }
848
44.7k
    return(0);
849
121k
}
850
851
852
/**
853
 * xmlParse3986URI:
854
 * @uri:  pointer to an URI structure
855
 * @str:  the string to analyze
856
 *
857
 * Parse an URI string and fills in the appropriate fields
858
 * of the @uri structure
859
 *
860
 * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
861
 *
862
 * Returns 0 or the error code
863
 */
864
static int
865
141k
xmlParse3986URI(xmlURIPtr uri, const char *str) {
866
141k
    int ret;
867
868
141k
    ret = xmlParse3986Scheme(uri, &str);
869
141k
    if (ret != 0) return(ret);
870
65.8k
    if (*str != ':') {
871
23.7k
  return(1);
872
23.7k
    }
873
42.1k
    str++;
874
42.1k
    ret = xmlParse3986HierPart(uri, &str);
875
42.1k
    if (ret != 0) return(ret);
876
39.2k
    if (*str == '?') {
877
2.84k
  str++;
878
2.84k
  ret = xmlParse3986Query(uri, &str);
879
2.84k
  if (ret != 0) return(ret);
880
2.84k
    }
881
39.2k
    if (*str == '#') {
882
5.83k
  str++;
883
5.83k
  ret = xmlParse3986Fragment(uri, &str);
884
5.83k
  if (ret != 0) return(ret);
885
5.83k
    }
886
39.2k
    if (*str != 0) {
887
20.3k
  xmlCleanURI(uri);
888
20.3k
  return(1);
889
20.3k
    }
890
18.8k
    return(0);
891
39.2k
}
892
893
/**
894
 * xmlParse3986URIReference:
895
 * @uri:  pointer to an URI structure
896
 * @str:  the string to analyze
897
 *
898
 * Parse an URI reference string and fills in the appropriate fields
899
 * of the @uri structure
900
 *
901
 * URI-reference = URI / relative-ref
902
 *
903
 * Returns 0 or the error code
904
 */
905
static int
906
141k
xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
907
141k
    int ret;
908
909
141k
    if (str == NULL)
910
0
  return(-1);
911
141k
    xmlCleanURI(uri);
912
913
    /*
914
     * Try first to parse absolute refs, then fallback to relative if
915
     * it fails.
916
     */
917
141k
    ret = xmlParse3986URI(uri, str);
918
141k
    if (ret != 0) {
919
122k
  xmlCleanURI(uri);
920
122k
        ret = xmlParse3986RelativeRef(uri, str);
921
122k
  if (ret != 0) {
922
77.5k
      xmlCleanURI(uri);
923
77.5k
      return(ret);
924
77.5k
  }
925
122k
    }
926
63.5k
    return(0);
927
141k
}
928
929
/**
930
 * xmlParseURI:
931
 * @str:  the URI string to analyze
932
 *
933
 * Parse an URI based on RFC 3986
934
 *
935
 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
936
 *
937
 * Returns a newly built xmlURIPtr or NULL in case of error
938
 */
939
xmlURIPtr
940
140k
xmlParseURI(const char *str) {
941
140k
    xmlURIPtr uri;
942
140k
    int ret;
943
944
140k
    if (str == NULL)
945
0
  return(NULL);
946
140k
    uri = xmlCreateURI();
947
140k
    if (uri != NULL) {
948
140k
  ret = xmlParse3986URIReference(uri, str);
949
140k
        if (ret) {
950
77.4k
      xmlFreeURI(uri);
951
77.4k
      return(NULL);
952
77.4k
  }
953
140k
    }
954
62.7k
    return(uri);
955
140k
}
956
957
/**
958
 * xmlParseURIReference:
959
 * @uri:  pointer to an URI structure
960
 * @str:  the string to analyze
961
 *
962
 * Parse an URI reference string based on RFC 3986 and fills in the
963
 * appropriate fields of the @uri structure
964
 *
965
 * URI-reference = URI / relative-ref
966
 *
967
 * Returns 0 or the error code
968
 */
969
int
970
971
xmlParseURIReference(xmlURIPtr uri, const char *str) {
971
971
    return(xmlParse3986URIReference(uri, str));
972
971
}
973
974
/**
975
 * xmlParseURIRaw:
976
 * @str:  the URI string to analyze
977
 * @raw:  if 1 unescaping of URI pieces are disabled
978
 *
979
 * Parse an URI but allows to keep intact the original fragments.
980
 *
981
 * URI-reference = URI / relative-ref
982
 *
983
 * Returns a newly built xmlURIPtr or NULL in case of error
984
 */
985
xmlURIPtr
986
0
xmlParseURIRaw(const char *str, int raw) {
987
0
    xmlURIPtr uri;
988
0
    int ret;
989
990
0
    if (str == NULL)
991
0
  return(NULL);
992
0
    uri = xmlCreateURI();
993
0
    if (uri != NULL) {
994
0
        if (raw) {
995
0
      uri->cleanup |= 2;
996
0
  }
997
0
  ret = xmlParseURIReference(uri, str);
998
0
        if (ret) {
999
0
      xmlFreeURI(uri);
1000
0
      return(NULL);
1001
0
  }
1002
0
    }
1003
0
    return(uri);
1004
0
}
1005
1006
/************************************************************************
1007
 *                  *
1008
 *      Generic URI structure functions     *
1009
 *                  *
1010
 ************************************************************************/
1011
1012
/**
1013
 * xmlCreateURI:
1014
 *
1015
 * Simply creates an empty xmlURI
1016
 *
1017
 * Returns the new structure or NULL in case of error
1018
 */
1019
xmlURIPtr
1020
141k
xmlCreateURI(void) {
1021
141k
    xmlURIPtr ret;
1022
1023
141k
    ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
1024
141k
    if (ret == NULL) {
1025
0
        xmlURIErrMemory("creating URI structure\n");
1026
0
  return(NULL);
1027
0
    }
1028
141k
    memset(ret, 0, sizeof(xmlURI));
1029
141k
    return(ret);
1030
141k
}
1031
1032
/**
1033
 * xmlSaveUriRealloc:
1034
 *
1035
 * Function to handle properly a reallocation when saving an URI
1036
 * Also imposes some limit on the length of an URI string output
1037
 */
1038
static xmlChar *
1039
1.15k
xmlSaveUriRealloc(xmlChar *ret, int *max) {
1040
1.15k
    xmlChar *temp;
1041
1.15k
    int tmp;
1042
1043
1.15k
    if (*max > MAX_URI_LENGTH) {
1044
9
        xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
1045
9
        return(NULL);
1046
9
    }
1047
1.14k
    tmp = *max * 2;
1048
1.14k
    temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
1049
1.14k
    if (temp == NULL) {
1050
0
        xmlURIErrMemory("saving URI\n");
1051
0
        return(NULL);
1052
0
    }
1053
1.14k
    *max = tmp;
1054
1.14k
    return(temp);
1055
1.14k
}
1056
1057
/**
1058
 * xmlSaveUri:
1059
 * @uri:  pointer to an xmlURI
1060
 *
1061
 * Save the URI as an escaped string
1062
 *
1063
 * Returns a new string (to be deallocated by caller)
1064
 */
1065
xmlChar *
1066
842
xmlSaveUri(xmlURIPtr uri) {
1067
842
    xmlChar *ret = NULL;
1068
842
    xmlChar *temp;
1069
842
    const char *p;
1070
842
    int len;
1071
842
    int max;
1072
1073
842
    if (uri == NULL) return(NULL);
1074
1075
1076
842
    max = 80;
1077
842
    ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
1078
842
    if (ret == NULL) {
1079
0
        xmlURIErrMemory("saving URI\n");
1080
0
  return(NULL);
1081
0
    }
1082
842
    len = 0;
1083
1084
842
    if (uri->scheme != NULL) {
1085
0
  p = uri->scheme;
1086
0
  while (*p != 0) {
1087
0
      if (len >= max) {
1088
0
                temp = xmlSaveUriRealloc(ret, &max);
1089
0
                if (temp == NULL) goto mem_error;
1090
0
    ret = temp;
1091
0
      }
1092
0
      ret[len++] = *p++;
1093
0
  }
1094
0
  if (len >= max) {
1095
0
            temp = xmlSaveUriRealloc(ret, &max);
1096
0
            if (temp == NULL) goto mem_error;
1097
0
            ret = temp;
1098
0
  }
1099
0
  ret[len++] = ':';
1100
0
    }
1101
842
    if (uri->opaque != NULL) {
1102
0
  p = uri->opaque;
1103
0
  while (*p != 0) {
1104
0
      if (len + 3 >= max) {
1105
0
                temp = xmlSaveUriRealloc(ret, &max);
1106
0
                if (temp == NULL) goto mem_error;
1107
0
                ret = temp;
1108
0
      }
1109
0
      if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1110
0
    ret[len++] = *p++;
1111
0
      else {
1112
0
    int val = *(unsigned char *)p++;
1113
0
    int hi = val / 0x10, lo = val % 0x10;
1114
0
    ret[len++] = '%';
1115
0
    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1116
0
    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1117
0
      }
1118
0
  }
1119
842
    } else {
1120
842
  if ((uri->server != NULL) || (uri->port == -1)) {
1121
549
      if (len + 3 >= max) {
1122
0
                temp = xmlSaveUriRealloc(ret, &max);
1123
0
                if (temp == NULL) goto mem_error;
1124
0
                ret = temp;
1125
0
      }
1126
549
      ret[len++] = '/';
1127
549
      ret[len++] = '/';
1128
549
      if (uri->user != NULL) {
1129
427
    p = uri->user;
1130
6.77M
    while (*p != 0) {
1131
6.77M
        if (len + 3 >= max) {
1132
884
                        temp = xmlSaveUriRealloc(ret, &max);
1133
884
                        if (temp == NULL) goto mem_error;
1134
878
                        ret = temp;
1135
878
        }
1136
6.77M
        if ((IS_UNRESERVED(*(p))) ||
1137
6.77M
      ((*(p) == ';')) || ((*(p) == ':')) ||
1138
6.77M
      ((*(p) == '&')) || ((*(p) == '=')) ||
1139
6.77M
      ((*(p) == '+')) || ((*(p) == '$')) ||
1140
6.77M
      ((*(p) == ',')))
1141
17.2k
      ret[len++] = *p++;
1142
6.75M
        else {
1143
6.75M
      int val = *(unsigned char *)p++;
1144
6.75M
      int hi = val / 0x10, lo = val % 0x10;
1145
6.75M
      ret[len++] = '%';
1146
6.75M
      ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1147
6.75M
      ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1148
6.75M
        }
1149
6.77M
    }
1150
421
    if (len + 3 >= max) {
1151
10
                    temp = xmlSaveUriRealloc(ret, &max);
1152
10
                    if (temp == NULL) goto mem_error;
1153
10
                    ret = temp;
1154
10
    }
1155
421
    ret[len++] = '@';
1156
421
      }
1157
543
      if (uri->server != NULL) {
1158
543
    p = uri->server;
1159
4.15M
    while (*p != 0) {
1160
4.15M
        if (len >= max) {
1161
79
      temp = xmlSaveUriRealloc(ret, &max);
1162
79
      if (temp == NULL) goto mem_error;
1163
78
      ret = temp;
1164
78
        }
1165
4.15M
        ret[len++] = *p++;
1166
4.15M
    }
1167
542
    if (uri->port > 0) {
1168
37
        if (len + 10 >= max) {
1169
12
      temp = xmlSaveUriRealloc(ret, &max);
1170
12
      if (temp == NULL) goto mem_error;
1171
12
      ret = temp;
1172
12
        }
1173
37
        len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1174
37
    }
1175
542
      }
1176
543
  } else if (uri->authority != NULL) {
1177
0
      if (len + 3 >= max) {
1178
0
                temp = xmlSaveUriRealloc(ret, &max);
1179
0
                if (temp == NULL) goto mem_error;
1180
0
                ret = temp;
1181
0
      }
1182
0
      ret[len++] = '/';
1183
0
      ret[len++] = '/';
1184
0
      p = uri->authority;
1185
0
      while (*p != 0) {
1186
0
    if (len + 3 >= max) {
1187
0
                    temp = xmlSaveUriRealloc(ret, &max);
1188
0
                    if (temp == NULL) goto mem_error;
1189
0
                    ret = temp;
1190
0
    }
1191
0
    if ((IS_UNRESERVED(*(p))) ||
1192
0
                    ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1193
0
                    ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1194
0
                    ((*(p) == '=')) || ((*(p) == '+')))
1195
0
        ret[len++] = *p++;
1196
0
    else {
1197
0
        int val = *(unsigned char *)p++;
1198
0
        int hi = val / 0x10, lo = val % 0x10;
1199
0
        ret[len++] = '%';
1200
0
        ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1201
0
        ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1202
0
    }
1203
0
      }
1204
293
  } else if (uri->scheme != NULL) {
1205
0
      if (len + 3 >= max) {
1206
0
                temp = xmlSaveUriRealloc(ret, &max);
1207
0
                if (temp == NULL) goto mem_error;
1208
0
                ret = temp;
1209
0
      }
1210
0
  }
1211
835
  if (uri->path != NULL) {
1212
428
      p = uri->path;
1213
      /*
1214
       * the colon in file:///d: should not be escaped or
1215
       * Windows accesses fail later.
1216
       */
1217
428
      if ((uri->scheme != NULL) &&
1218
428
    (p[0] == '/') &&
1219
428
    (((p[1] >= 'a') && (p[1] <= 'z')) ||
1220
0
     ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1221
428
    (p[2] == ':') &&
1222
428
          (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1223
0
    if (len + 3 >= max) {
1224
0
                    temp = xmlSaveUriRealloc(ret, &max);
1225
0
                    if (temp == NULL) goto mem_error;
1226
0
                    ret = temp;
1227
0
    }
1228
0
    ret[len++] = *p++;
1229
0
    ret[len++] = *p++;
1230
0
    ret[len++] = *p++;
1231
0
      }
1232
13.3k
      while (*p != 0) {
1233
12.8k
    if (len + 3 >= max) {
1234
77
                    temp = xmlSaveUriRealloc(ret, &max);
1235
77
                    if (temp == NULL) goto mem_error;
1236
77
                    ret = temp;
1237
77
    }
1238
12.8k
    if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1239
12.8k
                    ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1240
12.8k
              ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1241
12.8k
              ((*(p) == ',')))
1242
11.9k
        ret[len++] = *p++;
1243
939
    else {
1244
939
        int val = *(unsigned char *)p++;
1245
939
        int hi = val / 0x10, lo = val % 0x10;
1246
939
        ret[len++] = '%';
1247
939
        ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1248
939
        ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1249
939
    }
1250
12.8k
      }
1251
428
  }
1252
835
  if (uri->query_raw != NULL) {
1253
249
      if (len + 1 >= max) {
1254
12
                temp = xmlSaveUriRealloc(ret, &max);
1255
12
                if (temp == NULL) goto mem_error;
1256
11
                ret = temp;
1257
11
      }
1258
248
      ret[len++] = '?';
1259
248
      p = uri->query_raw;
1260
8.18k
      while (*p != 0) {
1261
7.93k
    if (len + 1 >= max) {
1262
67
                    temp = xmlSaveUriRealloc(ret, &max);
1263
67
                    if (temp == NULL) goto mem_error;
1264
66
                    ret = temp;
1265
66
    }
1266
7.93k
    ret[len++] = *p++;
1267
7.93k
      }
1268
586
  } else if (uri->query != NULL) {
1269
0
      if (len + 3 >= max) {
1270
0
                temp = xmlSaveUriRealloc(ret, &max);
1271
0
                if (temp == NULL) goto mem_error;
1272
0
                ret = temp;
1273
0
      }
1274
0
      ret[len++] = '?';
1275
0
      p = uri->query;
1276
0
      while (*p != 0) {
1277
0
    if (len + 3 >= max) {
1278
0
                    temp = xmlSaveUriRealloc(ret, &max);
1279
0
                    if (temp == NULL) goto mem_error;
1280
0
                    ret = temp;
1281
0
    }
1282
0
    if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1283
0
        ret[len++] = *p++;
1284
0
    else {
1285
0
        int val = *(unsigned char *)p++;
1286
0
        int hi = val / 0x10, lo = val % 0x10;
1287
0
        ret[len++] = '%';
1288
0
        ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1289
0
        ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1290
0
    }
1291
0
      }
1292
0
  }
1293
835
    }
1294
833
    if (uri->fragment != NULL) {
1295
0
  if (len + 3 >= max) {
1296
0
            temp = xmlSaveUriRealloc(ret, &max);
1297
0
            if (temp == NULL) goto mem_error;
1298
0
            ret = temp;
1299
0
  }
1300
0
  ret[len++] = '#';
1301
0
  p = uri->fragment;
1302
0
  while (*p != 0) {
1303
0
      if (len + 3 >= max) {
1304
0
                temp = xmlSaveUriRealloc(ret, &max);
1305
0
                if (temp == NULL) goto mem_error;
1306
0
                ret = temp;
1307
0
      }
1308
0
      if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1309
0
    ret[len++] = *p++;
1310
0
      else {
1311
0
    int val = *(unsigned char *)p++;
1312
0
    int hi = val / 0x10, lo = val % 0x10;
1313
0
    ret[len++] = '%';
1314
0
    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1315
0
    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1316
0
      }
1317
0
  }
1318
0
    }
1319
833
    if (len >= max) {
1320
13
        temp = xmlSaveUriRealloc(ret, &max);
1321
13
        if (temp == NULL) goto mem_error;
1322
13
        ret = temp;
1323
13
    }
1324
833
    ret[len] = 0;
1325
833
    return(ret);
1326
1327
9
mem_error:
1328
9
    xmlFree(ret);
1329
9
    return(NULL);
1330
833
}
1331
1332
/**
1333
 * xmlPrintURI:
1334
 * @stream:  a FILE* for the output
1335
 * @uri:  pointer to an xmlURI
1336
 *
1337
 * Prints the URI in the stream @stream.
1338
 */
1339
void
1340
0
xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1341
0
    xmlChar *out;
1342
1343
0
    out = xmlSaveUri(uri);
1344
0
    if (out != NULL) {
1345
0
  fprintf(stream, "%s", (char *) out);
1346
0
  xmlFree(out);
1347
0
    }
1348
0
}
1349
1350
/**
1351
 * xmlCleanURI:
1352
 * @uri:  pointer to an xmlURI
1353
 *
1354
 * Make sure the xmlURI struct is free of content
1355
 */
1356
static void
1357
437k
xmlCleanURI(xmlURIPtr uri) {
1358
437k
    if (uri == NULL) return;
1359
1360
437k
    if (uri->scheme != NULL) xmlFree(uri->scheme);
1361
437k
    uri->scheme = NULL;
1362
437k
    if (uri->server != NULL) xmlFree(uri->server);
1363
437k
    uri->server = NULL;
1364
437k
    if (uri->user != NULL) xmlFree(uri->user);
1365
437k
    uri->user = NULL;
1366
437k
    if (uri->path != NULL) xmlFree(uri->path);
1367
437k
    uri->path = NULL;
1368
437k
    if (uri->fragment != NULL) xmlFree(uri->fragment);
1369
437k
    uri->fragment = NULL;
1370
437k
    if (uri->opaque != NULL) xmlFree(uri->opaque);
1371
437k
    uri->opaque = NULL;
1372
437k
    if (uri->authority != NULL) xmlFree(uri->authority);
1373
437k
    uri->authority = NULL;
1374
437k
    if (uri->query != NULL) xmlFree(uri->query);
1375
437k
    uri->query = NULL;
1376
437k
    if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1377
437k
    uri->query_raw = NULL;
1378
437k
}
1379
1380
/**
1381
 * xmlFreeURI:
1382
 * @uri:  pointer to an xmlURI
1383
 *
1384
 * Free up the xmlURI struct
1385
 */
1386
void
1387
141k
xmlFreeURI(xmlURIPtr uri) {
1388
141k
    if (uri == NULL) return;
1389
1390
141k
    if (uri->scheme != NULL) xmlFree(uri->scheme);
1391
141k
    if (uri->server != NULL) xmlFree(uri->server);
1392
141k
    if (uri->user != NULL) xmlFree(uri->user);
1393
141k
    if (uri->path != NULL) xmlFree(uri->path);
1394
141k
    if (uri->fragment != NULL) xmlFree(uri->fragment);
1395
141k
    if (uri->opaque != NULL) xmlFree(uri->opaque);
1396
141k
    if (uri->authority != NULL) xmlFree(uri->authority);
1397
141k
    if (uri->query != NULL) xmlFree(uri->query);
1398
141k
    if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1399
141k
    xmlFree(uri);
1400
141k
}
1401
1402
/************************************************************************
1403
 *                  *
1404
 *      Helper functions        *
1405
 *                  *
1406
 ************************************************************************/
1407
1408
/**
1409
 * xmlNormalizeURIPath:
1410
 * @path:  pointer to the path string
1411
 *
1412
 * Applies the 5 normalization steps to a path string--that is, RFC 2396
1413
 * Section 5.2, steps 6.c through 6.g.
1414
 *
1415
 * Normalization occurs directly on the string, no new allocation is done
1416
 *
1417
 * Returns 0 or an error code
1418
 */
1419
int
1420
0
xmlNormalizeURIPath(char *path) {
1421
0
    char *cur, *out;
1422
1423
0
    if (path == NULL)
1424
0
  return(-1);
1425
1426
    /* Skip all initial "/" chars.  We want to get to the beginning of the
1427
     * first non-empty segment.
1428
     */
1429
0
    cur = path;
1430
0
    while (cur[0] == '/')
1431
0
      ++cur;
1432
0
    if (cur[0] == '\0')
1433
0
      return(0);
1434
1435
    /* Keep everything we've seen so far.  */
1436
0
    out = cur;
1437
1438
    /*
1439
     * Analyze each segment in sequence for cases (c) and (d).
1440
     */
1441
0
    while (cur[0] != '\0') {
1442
  /*
1443
   * c) All occurrences of "./", where "." is a complete path segment,
1444
   *    are removed from the buffer string.
1445
   */
1446
0
  if ((cur[0] == '.') && (cur[1] == '/')) {
1447
0
      cur += 2;
1448
      /* '//' normalization should be done at this point too */
1449
0
      while (cur[0] == '/')
1450
0
    cur++;
1451
0
      continue;
1452
0
  }
1453
1454
  /*
1455
   * d) If the buffer string ends with "." as a complete path segment,
1456
   *    that "." is removed.
1457
   */
1458
0
  if ((cur[0] == '.') && (cur[1] == '\0'))
1459
0
      break;
1460
1461
  /* Otherwise keep the segment.  */
1462
0
  while (cur[0] != '/') {
1463
0
            if (cur[0] == '\0')
1464
0
              goto done_cd;
1465
0
      (out++)[0] = (cur++)[0];
1466
0
  }
1467
  /* normalize // */
1468
0
  while ((cur[0] == '/') && (cur[1] == '/'))
1469
0
      cur++;
1470
1471
0
        (out++)[0] = (cur++)[0];
1472
0
    }
1473
0
 done_cd:
1474
0
    out[0] = '\0';
1475
1476
    /* Reset to the beginning of the first segment for the next sequence.  */
1477
0
    cur = path;
1478
0
    while (cur[0] == '/')
1479
0
      ++cur;
1480
0
    if (cur[0] == '\0')
1481
0
  return(0);
1482
1483
    /*
1484
     * Analyze each segment in sequence for cases (e) and (f).
1485
     *
1486
     * e) All occurrences of "<segment>/../", where <segment> is a
1487
     *    complete path segment not equal to "..", are removed from the
1488
     *    buffer string.  Removal of these path segments is performed
1489
     *    iteratively, removing the leftmost matching pattern on each
1490
     *    iteration, until no matching pattern remains.
1491
     *
1492
     * f) If the buffer string ends with "<segment>/..", where <segment>
1493
     *    is a complete path segment not equal to "..", that
1494
     *    "<segment>/.." is removed.
1495
     *
1496
     * To satisfy the "iterative" clause in (e), we need to collapse the
1497
     * string every time we find something that needs to be removed.  Thus,
1498
     * we don't need to keep two pointers into the string: we only need a
1499
     * "current position" pointer.
1500
     */
1501
0
    while (1) {
1502
0
        char *segp, *tmp;
1503
1504
        /* At the beginning of each iteration of this loop, "cur" points to
1505
         * the first character of the segment we want to examine.
1506
         */
1507
1508
        /* Find the end of the current segment.  */
1509
0
        segp = cur;
1510
0
        while ((segp[0] != '/') && (segp[0] != '\0'))
1511
0
          ++segp;
1512
1513
        /* If this is the last segment, we're done (we need at least two
1514
         * segments to meet the criteria for the (e) and (f) cases).
1515
         */
1516
0
        if (segp[0] == '\0')
1517
0
          break;
1518
1519
        /* If the first segment is "..", or if the next segment _isn't_ "..",
1520
         * keep this segment and try the next one.
1521
         */
1522
0
        ++segp;
1523
0
        if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1524
0
            || ((segp[0] != '.') || (segp[1] != '.')
1525
0
                || ((segp[2] != '/') && (segp[2] != '\0')))) {
1526
0
          cur = segp;
1527
0
          continue;
1528
0
        }
1529
1530
        /* If we get here, remove this segment and the next one and back up
1531
         * to the previous segment (if there is one), to implement the
1532
         * "iteratively" clause.  It's pretty much impossible to back up
1533
         * while maintaining two pointers into the buffer, so just compact
1534
         * the whole buffer now.
1535
         */
1536
1537
        /* If this is the end of the buffer, we're done.  */
1538
0
        if (segp[2] == '\0') {
1539
0
          cur[0] = '\0';
1540
0
          break;
1541
0
        }
1542
        /* Valgrind complained, strcpy(cur, segp + 3); */
1543
        /* string will overlap, do not use strcpy */
1544
0
        tmp = cur;
1545
0
        segp += 3;
1546
0
        while ((*tmp++ = *segp++) != 0)
1547
0
          ;
1548
1549
        /* If there are no previous segments, then keep going from here.  */
1550
0
        segp = cur;
1551
0
        while ((segp > path) && ((--segp)[0] == '/'))
1552
0
          ;
1553
0
        if (segp == path)
1554
0
          continue;
1555
1556
        /* "segp" is pointing to the end of a previous segment; find it's
1557
         * start.  We need to back up to the previous segment and start
1558
         * over with that to handle things like "foo/bar/../..".  If we
1559
         * don't do this, then on the first pass we'll remove the "bar/..",
1560
         * but be pointing at the second ".." so we won't realize we can also
1561
         * remove the "foo/..".
1562
         */
1563
0
        cur = segp;
1564
0
        while ((cur > path) && (cur[-1] != '/'))
1565
0
          --cur;
1566
0
    }
1567
0
    out[0] = '\0';
1568
1569
    /*
1570
     * g) If the resulting buffer string still begins with one or more
1571
     *    complete path segments of "..", then the reference is
1572
     *    considered to be in error. Implementations may handle this
1573
     *    error by retaining these components in the resolved path (i.e.,
1574
     *    treating them as part of the final URI), by removing them from
1575
     *    the resolved path (i.e., discarding relative levels above the
1576
     *    root), or by avoiding traversal of the reference.
1577
     *
1578
     * We discard them from the final path.
1579
     */
1580
0
    if (path[0] == '/') {
1581
0
      cur = path;
1582
0
      while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1583
0
             && ((cur[3] == '/') || (cur[3] == '\0')))
1584
0
  cur += 3;
1585
1586
0
      if (cur != path) {
1587
0
  out = path;
1588
0
  while (cur[0] != '\0')
1589
0
          (out++)[0] = (cur++)[0];
1590
0
  out[0] = 0;
1591
0
      }
1592
0
    }
1593
1594
0
    return(0);
1595
0
}
1596
1597
24.2k
static int is_hex(char c) {
1598
24.2k
    if (((c >= '0') && (c <= '9')) ||
1599
24.2k
        ((c >= 'a') && (c <= 'f')) ||
1600
24.2k
        ((c >= 'A') && (c <= 'F')))
1601
20.6k
  return(1);
1602
3.62k
    return(0);
1603
24.2k
}
1604
1605
/**
1606
 * xmlURIUnescapeString:
1607
 * @str:  the string to unescape
1608
 * @len:   the length in bytes to unescape (or <= 0 to indicate full string)
1609
 * @target:  optional destination buffer
1610
 *
1611
 * Unescaping routine, but does not check that the string is an URI. The
1612
 * output is a direct unsigned char translation of %XX values (no encoding)
1613
 * Note that the length of the result can only be smaller or same size as
1614
 * the input string.
1615
 *
1616
 * Returns a copy of the string, but unescaped, will return NULL only in case
1617
 * of error
1618
 */
1619
char *
1620
162k
xmlURIUnescapeString(const char *str, int len, char *target) {
1621
162k
    char *ret, *out;
1622
162k
    const char *in;
1623
1624
162k
    if (str == NULL)
1625
0
  return(NULL);
1626
162k
    if (len <= 0) len = strlen(str);
1627
162k
    if (len < 0) return(NULL);
1628
1629
162k
    if (target == NULL) {
1630
162k
  ret = (char *) xmlMallocAtomic(len + 1);
1631
162k
  if (ret == NULL) {
1632
0
            xmlURIErrMemory("unescaping URI value\n");
1633
0
      return(NULL);
1634
0
  }
1635
162k
    } else
1636
0
  ret = target;
1637
162k
    in = str;
1638
162k
    out = ret;
1639
103M
    while(len > 0) {
1640
103M
  if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1641
9.31k
            int c = 0;
1642
9.31k
      in++;
1643
9.31k
      if ((*in >= '0') && (*in <= '9'))
1644
5.34k
          c = (*in - '0');
1645
3.97k
      else if ((*in >= 'a') && (*in <= 'f'))
1646
1.44k
          c = (*in - 'a') + 10;
1647
2.53k
      else if ((*in >= 'A') && (*in <= 'F'))
1648
2.53k
          c = (*in - 'A') + 10;
1649
9.31k
      in++;
1650
9.31k
      if ((*in >= '0') && (*in <= '9'))
1651
2.48k
          c = c * 16 + (*in - '0');
1652
6.82k
      else if ((*in >= 'a') && (*in <= 'f'))
1653
3.36k
          c = c * 16 + (*in - 'a') + 10;
1654
3.46k
      else if ((*in >= 'A') && (*in <= 'F'))
1655
3.46k
          c = c * 16 + (*in - 'A') + 10;
1656
9.31k
      in++;
1657
9.31k
      len -= 3;
1658
9.31k
      *out++ = (char) c;
1659
103M
  } else {
1660
103M
      *out++ = *in++;
1661
103M
      len--;
1662
103M
  }
1663
103M
    }
1664
162k
    *out = 0;
1665
162k
    return(ret);
1666
162k
}
1667
1668
/**
1669
 * xmlURIEscapeStr:
1670
 * @str:  string to escape
1671
 * @list: exception list string of chars not to escape
1672
 *
1673
 * This routine escapes a string to hex, ignoring reserved characters (a-z)
1674
 * and the characters in the exception list.
1675
 *
1676
 * Returns a new escaped string or NULL in case of error.
1677
 */
1678
xmlChar *
1679
0
xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1680
0
    xmlChar *ret, ch;
1681
0
    xmlChar *temp;
1682
0
    const xmlChar *in;
1683
0
    int len, out;
1684
1685
0
    if (str == NULL)
1686
0
  return(NULL);
1687
0
    if (str[0] == 0)
1688
0
  return(xmlStrdup(str));
1689
0
    len = xmlStrlen(str);
1690
0
    if (!(len > 0)) return(NULL);
1691
1692
0
    len += 20;
1693
0
    ret = (xmlChar *) xmlMallocAtomic(len);
1694
0
    if (ret == NULL) {
1695
0
        xmlURIErrMemory("escaping URI value\n");
1696
0
  return(NULL);
1697
0
    }
1698
0
    in = (const xmlChar *) str;
1699
0
    out = 0;
1700
0
    while(*in != 0) {
1701
0
  if (len - out <= 3) {
1702
0
            temp = xmlSaveUriRealloc(ret, &len);
1703
0
      if (temp == NULL) {
1704
0
                xmlURIErrMemory("escaping URI value\n");
1705
0
    xmlFree(ret);
1706
0
    return(NULL);
1707
0
      }
1708
0
      ret = temp;
1709
0
  }
1710
1711
0
  ch = *in;
1712
1713
0
  if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1714
0
      unsigned char val;
1715
0
      ret[out++] = '%';
1716
0
      val = ch >> 4;
1717
0
      if (val <= 9)
1718
0
    ret[out++] = '0' + val;
1719
0
      else
1720
0
    ret[out++] = 'A' + val - 0xA;
1721
0
      val = ch & 0xF;
1722
0
      if (val <= 9)
1723
0
    ret[out++] = '0' + val;
1724
0
      else
1725
0
    ret[out++] = 'A' + val - 0xA;
1726
0
      in++;
1727
0
  } else {
1728
0
      ret[out++] = *in++;
1729
0
  }
1730
1731
0
    }
1732
0
    ret[out] = 0;
1733
0
    return(ret);
1734
0
}
1735
1736
/**
1737
 * xmlURIEscape:
1738
 * @str:  the string of the URI to escape
1739
 *
1740
 * Escaping routine, does not do validity checks !
1741
 * It will try to escape the chars needing this, but this is heuristic
1742
 * based it's impossible to be sure.
1743
 *
1744
 * Returns an copy of the string, but escaped
1745
 *
1746
 * 25 May 2001
1747
 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1748
 * according to RFC2396.
1749
 *   - Carl Douglas
1750
 */
1751
xmlChar *
1752
xmlURIEscape(const xmlChar * str)
1753
0
{
1754
0
    xmlChar *ret, *segment = NULL;
1755
0
    xmlURIPtr uri;
1756
0
    int ret2;
1757
1758
0
    if (str == NULL)
1759
0
        return (NULL);
1760
1761
0
    uri = xmlCreateURI();
1762
0
    if (uri != NULL) {
1763
  /*
1764
   * Allow escaping errors in the unescaped form
1765
   */
1766
0
        uri->cleanup = 1;
1767
0
        ret2 = xmlParseURIReference(uri, (const char *)str);
1768
0
        if (ret2) {
1769
0
            xmlFreeURI(uri);
1770
0
            return (NULL);
1771
0
        }
1772
0
    }
1773
1774
0
    if (!uri)
1775
0
        return NULL;
1776
1777
0
    ret = NULL;
1778
1779
0
#define NULLCHK(p) if(!p) { \
1780
0
         xmlURIErrMemory("escaping URI value\n"); \
1781
0
         xmlFreeURI(uri); \
1782
0
         xmlFree(ret); \
1783
0
         return NULL; } \
1784
0
1785
0
    if (uri->scheme) {
1786
0
        segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1787
0
        NULLCHK(segment)
1788
0
        ret = xmlStrcat(ret, segment);
1789
0
        ret = xmlStrcat(ret, BAD_CAST ":");
1790
0
        xmlFree(segment);
1791
0
    }
1792
1793
0
    if (uri->authority) {
1794
0
        segment =
1795
0
            xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1796
0
        NULLCHK(segment)
1797
0
        ret = xmlStrcat(ret, BAD_CAST "//");
1798
0
        ret = xmlStrcat(ret, segment);
1799
0
        xmlFree(segment);
1800
0
    }
1801
1802
0
    if (uri->user) {
1803
0
        segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1804
0
        NULLCHK(segment)
1805
0
        ret = xmlStrcat(ret,BAD_CAST "//");
1806
0
        ret = xmlStrcat(ret, segment);
1807
0
        ret = xmlStrcat(ret, BAD_CAST "@");
1808
0
        xmlFree(segment);
1809
0
    }
1810
1811
0
    if (uri->server) {
1812
0
        segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1813
0
        NULLCHK(segment)
1814
0
        if (uri->user == NULL)
1815
0
            ret = xmlStrcat(ret, BAD_CAST "//");
1816
0
        ret = xmlStrcat(ret, segment);
1817
0
        xmlFree(segment);
1818
0
    }
1819
1820
0
    if (uri->port) {
1821
0
        xmlChar port[10];
1822
1823
0
        snprintf((char *) port, 10, "%d", uri->port);
1824
0
        ret = xmlStrcat(ret, BAD_CAST ":");
1825
0
        ret = xmlStrcat(ret, port);
1826
0
    }
1827
1828
0
    if (uri->path) {
1829
0
        segment =
1830
0
            xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1831
0
        NULLCHK(segment)
1832
0
        ret = xmlStrcat(ret, segment);
1833
0
        xmlFree(segment);
1834
0
    }
1835
1836
0
    if (uri->query_raw) {
1837
0
        ret = xmlStrcat(ret, BAD_CAST "?");
1838
0
        ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1839
0
    }
1840
0
    else if (uri->query) {
1841
0
        segment =
1842
0
            xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1843
0
        NULLCHK(segment)
1844
0
        ret = xmlStrcat(ret, BAD_CAST "?");
1845
0
        ret = xmlStrcat(ret, segment);
1846
0
        xmlFree(segment);
1847
0
    }
1848
1849
0
    if (uri->opaque) {
1850
0
        segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1851
0
        NULLCHK(segment)
1852
0
        ret = xmlStrcat(ret, segment);
1853
0
        xmlFree(segment);
1854
0
    }
1855
1856
0
    if (uri->fragment) {
1857
0
        segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1858
0
        NULLCHK(segment)
1859
0
        ret = xmlStrcat(ret, BAD_CAST "#");
1860
0
        ret = xmlStrcat(ret, segment);
1861
0
        xmlFree(segment);
1862
0
    }
1863
1864
0
    xmlFreeURI(uri);
1865
0
#undef NULLCHK
1866
1867
0
    return (ret);
1868
0
}
1869
1870
/************************************************************************
1871
 *                  *
1872
 *      Public functions        *
1873
 *                  *
1874
 ************************************************************************/
1875
1876
/**
1877
 * xmlBuildURI:
1878
 * @URI:  the URI instance found in the document
1879
 * @base:  the base value
1880
 *
1881
 * Computes he final URI of the reference done by checking that
1882
 * the given URI is valid, and building the final URI using the
1883
 * base URI. This is processed according to section 5.2 of the
1884
 * RFC 2396
1885
 *
1886
 * 5.2. Resolving Relative References to Absolute Form
1887
 *
1888
 * Returns a new URI string (to be freed by the caller) or NULL in case
1889
 *         of error.
1890
 */
1891
xmlChar *
1892
1.10k
xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1893
1.10k
    xmlChar *val = NULL;
1894
1.10k
    int ret, len, indx, cur, out;
1895
1.10k
    xmlURIPtr ref = NULL;
1896
1.10k
    xmlURIPtr bas = NULL;
1897
1.10k
    xmlURIPtr res = NULL;
1898
1899
    /*
1900
     * 1) The URI reference is parsed into the potential four components and
1901
     *    fragment identifier, as described in Section 4.3.
1902
     *
1903
     *    NOTE that a completely empty URI is treated by modern browsers
1904
     *    as a reference to "." rather than as a synonym for the current
1905
     *    URI.  Should we do that here?
1906
     */
1907
1.10k
    if (URI == NULL)
1908
0
  ret = -1;
1909
1.10k
    else {
1910
1.10k
  if (*URI) {
1911
971
      ref = xmlCreateURI();
1912
971
      if (ref == NULL)
1913
0
    goto done;
1914
971
      ret = xmlParseURIReference(ref, (const char *) URI);
1915
971
  }
1916
136
  else
1917
136
      ret = 0;
1918
1.10k
    }
1919
1.10k
    if (ret != 0)
1920
112
  goto done;
1921
995
    if ((ref != NULL) && (ref->scheme != NULL)) {
1922
  /*
1923
   * The URI is absolute don't modify.
1924
   */
1925
17
  val = xmlStrdup(URI);
1926
17
  goto done;
1927
17
    }
1928
978
    if (base == NULL)
1929
978
  ret = -1;
1930
0
    else {
1931
0
  bas = xmlCreateURI();
1932
0
  if (bas == NULL)
1933
0
      goto done;
1934
0
  ret = xmlParseURIReference(bas, (const char *) base);
1935
0
    }
1936
978
    if (ret != 0) {
1937
978
  if (ref)
1938
842
      val = xmlSaveUri(ref);
1939
978
  goto done;
1940
978
    }
1941
0
    if (ref == NULL) {
1942
  /*
1943
   * the base fragment must be ignored
1944
   */
1945
0
  if (bas->fragment != NULL) {
1946
0
      xmlFree(bas->fragment);
1947
0
      bas->fragment = NULL;
1948
0
  }
1949
0
  val = xmlSaveUri(bas);
1950
0
  goto done;
1951
0
    }
1952
1953
    /*
1954
     * 2) If the path component is empty and the scheme, authority, and
1955
     *    query components are undefined, then it is a reference to the
1956
     *    current document and we are done.  Otherwise, the reference URI's
1957
     *    query and fragment components are defined as found (or not found)
1958
     *    within the URI reference and not inherited from the base URI.
1959
     *
1960
     *    NOTE that in modern browsers, the parsing differs from the above
1961
     *    in the following aspect:  the query component is allowed to be
1962
     *    defined while still treating this as a reference to the current
1963
     *    document.
1964
     */
1965
0
    res = xmlCreateURI();
1966
0
    if (res == NULL)
1967
0
  goto done;
1968
0
    if ((ref->scheme == NULL) && (ref->path == NULL) &&
1969
0
  ((ref->authority == NULL) && (ref->server == NULL))) {
1970
0
  if (bas->scheme != NULL)
1971
0
      res->scheme = xmlMemStrdup(bas->scheme);
1972
0
  if (bas->authority != NULL)
1973
0
      res->authority = xmlMemStrdup(bas->authority);
1974
0
  else if ((bas->server != NULL) || (bas->port == -1)) {
1975
0
      if (bas->server != NULL)
1976
0
    res->server = xmlMemStrdup(bas->server);
1977
0
      if (bas->user != NULL)
1978
0
    res->user = xmlMemStrdup(bas->user);
1979
0
      res->port = bas->port;
1980
0
  }
1981
0
  if (bas->path != NULL)
1982
0
      res->path = xmlMemStrdup(bas->path);
1983
0
  if (ref->query_raw != NULL)
1984
0
      res->query_raw = xmlMemStrdup (ref->query_raw);
1985
0
  else if (ref->query != NULL)
1986
0
      res->query = xmlMemStrdup(ref->query);
1987
0
  else if (bas->query_raw != NULL)
1988
0
      res->query_raw = xmlMemStrdup(bas->query_raw);
1989
0
  else if (bas->query != NULL)
1990
0
      res->query = xmlMemStrdup(bas->query);
1991
0
  if (ref->fragment != NULL)
1992
0
      res->fragment = xmlMemStrdup(ref->fragment);
1993
0
  goto step_7;
1994
0
    }
1995
1996
    /*
1997
     * 3) If the scheme component is defined, indicating that the reference
1998
     *    starts with a scheme name, then the reference is interpreted as an
1999
     *    absolute URI and we are done.  Otherwise, the reference URI's
2000
     *    scheme is inherited from the base URI's scheme component.
2001
     */
2002
0
    if (ref->scheme != NULL) {
2003
0
  val = xmlSaveUri(ref);
2004
0
  goto done;
2005
0
    }
2006
0
    if (bas->scheme != NULL)
2007
0
  res->scheme = xmlMemStrdup(bas->scheme);
2008
2009
0
    if (ref->query_raw != NULL)
2010
0
  res->query_raw = xmlMemStrdup(ref->query_raw);
2011
0
    else if (ref->query != NULL)
2012
0
  res->query = xmlMemStrdup(ref->query);
2013
0
    if (ref->fragment != NULL)
2014
0
  res->fragment = xmlMemStrdup(ref->fragment);
2015
2016
    /*
2017
     * 4) If the authority component is defined, then the reference is a
2018
     *    network-path and we skip to step 7.  Otherwise, the reference
2019
     *    URI's authority is inherited from the base URI's authority
2020
     *    component, which will also be undefined if the URI scheme does not
2021
     *    use an authority component.
2022
     */
2023
0
    if ((ref->authority != NULL) || (ref->server != NULL)) {
2024
0
  if (ref->authority != NULL)
2025
0
      res->authority = xmlMemStrdup(ref->authority);
2026
0
  else {
2027
0
      res->server = xmlMemStrdup(ref->server);
2028
0
      if (ref->user != NULL)
2029
0
    res->user = xmlMemStrdup(ref->user);
2030
0
            res->port = ref->port;
2031
0
  }
2032
0
  if (ref->path != NULL)
2033
0
      res->path = xmlMemStrdup(ref->path);
2034
0
  goto step_7;
2035
0
    }
2036
0
    if (bas->authority != NULL)
2037
0
  res->authority = xmlMemStrdup(bas->authority);
2038
0
    else if ((bas->server != NULL) || (bas->port == -1)) {
2039
0
  if (bas->server != NULL)
2040
0
      res->server = xmlMemStrdup(bas->server);
2041
0
  if (bas->user != NULL)
2042
0
      res->user = xmlMemStrdup(bas->user);
2043
0
  res->port = bas->port;
2044
0
    }
2045
2046
    /*
2047
     * 5) If the path component begins with a slash character ("/"), then
2048
     *    the reference is an absolute-path and we skip to step 7.
2049
     */
2050
0
    if ((ref->path != NULL) && (ref->path[0] == '/')) {
2051
0
  res->path = xmlMemStrdup(ref->path);
2052
0
  goto step_7;
2053
0
    }
2054
2055
2056
    /*
2057
     * 6) If this step is reached, then we are resolving a relative-path
2058
     *    reference.  The relative path needs to be merged with the base
2059
     *    URI's path.  Although there are many ways to do this, we will
2060
     *    describe a simple method using a separate string buffer.
2061
     *
2062
     * Allocate a buffer large enough for the result string.
2063
     */
2064
0
    len = 2; /* extra / and 0 */
2065
0
    if (ref->path != NULL)
2066
0
  len += strlen(ref->path);
2067
0
    if (bas->path != NULL)
2068
0
  len += strlen(bas->path);
2069
0
    res->path = (char *) xmlMallocAtomic(len);
2070
0
    if (res->path == NULL) {
2071
0
        xmlURIErrMemory("resolving URI against base\n");
2072
0
  goto done;
2073
0
    }
2074
0
    res->path[0] = 0;
2075
2076
    /*
2077
     * a) All but the last segment of the base URI's path component is
2078
     *    copied to the buffer.  In other words, any characters after the
2079
     *    last (right-most) slash character, if any, are excluded.
2080
     */
2081
0
    cur = 0;
2082
0
    out = 0;
2083
0
    if (bas->path != NULL) {
2084
0
  while (bas->path[cur] != 0) {
2085
0
      while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2086
0
    cur++;
2087
0
      if (bas->path[cur] == 0)
2088
0
    break;
2089
2090
0
      cur++;
2091
0
      while (out < cur) {
2092
0
    res->path[out] = bas->path[out];
2093
0
    out++;
2094
0
      }
2095
0
  }
2096
0
    }
2097
0
    res->path[out] = 0;
2098
2099
    /*
2100
     * b) The reference's path component is appended to the buffer
2101
     *    string.
2102
     */
2103
0
    if (ref->path != NULL && ref->path[0] != 0) {
2104
0
  indx = 0;
2105
  /*
2106
   * Ensure the path includes a '/'
2107
   */
2108
0
  if ((out == 0) && (bas->server != NULL))
2109
0
      res->path[out++] = '/';
2110
0
  while (ref->path[indx] != 0) {
2111
0
      res->path[out++] = ref->path[indx++];
2112
0
  }
2113
0
    }
2114
0
    res->path[out] = 0;
2115
2116
    /*
2117
     * Steps c) to h) are really path normalization steps
2118
     */
2119
0
    xmlNormalizeURIPath(res->path);
2120
2121
0
step_7:
2122
2123
    /*
2124
     * 7) The resulting URI components, including any inherited from the
2125
     *    base URI, are recombined to give the absolute form of the URI
2126
     *    reference.
2127
     */
2128
0
    val = xmlSaveUri(res);
2129
2130
1.10k
done:
2131
1.10k
    if (ref != NULL)
2132
971
  xmlFreeURI(ref);
2133
1.10k
    if (bas != NULL)
2134
0
  xmlFreeURI(bas);
2135
1.10k
    if (res != NULL)
2136
0
  xmlFreeURI(res);
2137
1.10k
    return(val);
2138
0
}
2139
2140
/**
2141
 * xmlBuildRelativeURI:
2142
 * @URI:  the URI reference under consideration
2143
 * @base:  the base value
2144
 *
2145
 * Expresses the URI of the reference in terms relative to the
2146
 * base.  Some examples of this operation include:
2147
 *     base = "http://site1.com/docs/book1.html"
2148
 *        URI input                        URI returned
2149
 *     docs/pic1.gif                    pic1.gif
2150
 *     docs/img/pic1.gif                img/pic1.gif
2151
 *     img/pic1.gif                     ../img/pic1.gif
2152
 *     http://site1.com/docs/pic1.gif   pic1.gif
2153
 *     http://site2.com/docs/pic1.gif   http://site2.com/docs/pic1.gif
2154
 *
2155
 *     base = "docs/book1.html"
2156
 *        URI input                        URI returned
2157
 *     docs/pic1.gif                    pic1.gif
2158
 *     docs/img/pic1.gif                img/pic1.gif
2159
 *     img/pic1.gif                     ../img/pic1.gif
2160
 *     http://site1.com/docs/pic1.gif   http://site1.com/docs/pic1.gif
2161
 *
2162
 *
2163
 * Note: if the URI reference is really weird or complicated, it may be
2164
 *       worthwhile to first convert it into a "nice" one by calling
2165
 *       xmlBuildURI (using 'base') before calling this routine,
2166
 *       since this routine (for reasonable efficiency) assumes URI has
2167
 *       already been through some validation.
2168
 *
2169
 * Returns a new URI string (to be freed by the caller) or NULL in case
2170
 * error.
2171
 */
2172
xmlChar *
2173
xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2174
0
{
2175
0
    xmlChar *val = NULL;
2176
0
    int ret;
2177
0
    int ix;
2178
0
    int nbslash = 0;
2179
0
    int len;
2180
0
    xmlURIPtr ref = NULL;
2181
0
    xmlURIPtr bas = NULL;
2182
0
    xmlChar *bptr, *uptr, *vptr;
2183
0
    int remove_path = 0;
2184
2185
0
    if ((URI == NULL) || (*URI == 0))
2186
0
  return NULL;
2187
2188
    /*
2189
     * First parse URI into a standard form
2190
     */
2191
0
    ref = xmlCreateURI ();
2192
0
    if (ref == NULL)
2193
0
  return NULL;
2194
    /* If URI not already in "relative" form */
2195
0
    if (URI[0] != '.') {
2196
0
  ret = xmlParseURIReference (ref, (const char *) URI);
2197
0
  if (ret != 0)
2198
0
      goto done;   /* Error in URI, return NULL */
2199
0
    } else
2200
0
  ref->path = (char *)xmlStrdup(URI);
2201
2202
    /*
2203
     * Next parse base into the same standard form
2204
     */
2205
0
    if ((base == NULL) || (*base == 0)) {
2206
0
  val = xmlStrdup (URI);
2207
0
  goto done;
2208
0
    }
2209
0
    bas = xmlCreateURI ();
2210
0
    if (bas == NULL)
2211
0
  goto done;
2212
0
    if (base[0] != '.') {
2213
0
  ret = xmlParseURIReference (bas, (const char *) base);
2214
0
  if (ret != 0)
2215
0
      goto done;   /* Error in base, return NULL */
2216
0
    } else
2217
0
  bas->path = (char *)xmlStrdup(base);
2218
2219
    /*
2220
     * If the scheme / server on the URI differs from the base,
2221
     * just return the URI
2222
     */
2223
0
    if ((ref->scheme != NULL) &&
2224
0
  ((bas->scheme == NULL) ||
2225
0
   (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2226
0
   (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
2227
0
  val = xmlStrdup (URI);
2228
0
  goto done;
2229
0
    }
2230
0
    if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2231
0
  val = xmlStrdup(BAD_CAST "");
2232
0
  goto done;
2233
0
    }
2234
0
    if (bas->path == NULL) {
2235
0
  val = xmlStrdup((xmlChar *)ref->path);
2236
0
  goto done;
2237
0
    }
2238
0
    if (ref->path == NULL) {
2239
0
        ref->path = (char *) "/";
2240
0
  remove_path = 1;
2241
0
    }
2242
2243
    /*
2244
     * At this point (at last!) we can compare the two paths
2245
     *
2246
     * First we take care of the special case where either of the
2247
     * two path components may be missing (bug 316224)
2248
     */
2249
0
    bptr = (xmlChar *)bas->path;
2250
0
    {
2251
0
        xmlChar *rptr = (xmlChar *) ref->path;
2252
0
        int pos = 0;
2253
2254
        /*
2255
         * Next we compare the two strings and find where they first differ
2256
         */
2257
0
  if ((*rptr == '.') && (rptr[1] == '/'))
2258
0
            rptr += 2;
2259
0
  if ((*bptr == '.') && (bptr[1] == '/'))
2260
0
            bptr += 2;
2261
0
  else if ((*bptr == '/') && (*rptr != '/'))
2262
0
      bptr++;
2263
0
  while ((bptr[pos] == rptr[pos]) && (bptr[pos] != 0))
2264
0
      pos++;
2265
2266
0
  if (bptr[pos] == rptr[pos]) {
2267
0
      val = xmlStrdup(BAD_CAST "");
2268
0
      goto done;    /* (I can't imagine why anyone would do this) */
2269
0
  }
2270
2271
  /*
2272
   * In URI, "back up" to the last '/' encountered.  This will be the
2273
   * beginning of the "unique" suffix of URI
2274
   */
2275
0
  ix = pos;
2276
0
  for (; ix > 0; ix--) {
2277
0
      if (rptr[ix - 1] == '/')
2278
0
    break;
2279
0
  }
2280
0
  uptr = (xmlChar *)&rptr[ix];
2281
2282
  /*
2283
   * In base, count the number of '/' from the differing point
2284
   */
2285
0
  for (; bptr[ix] != 0; ix++) {
2286
0
      if (bptr[ix] == '/')
2287
0
    nbslash++;
2288
0
  }
2289
2290
  /*
2291
   * e.g: URI="foo/" base="foo/bar" -> "./"
2292
   */
2293
0
  if (nbslash == 0 && !uptr[0]) {
2294
0
      val = xmlStrdup(BAD_CAST "./");
2295
0
      goto done;
2296
0
  }
2297
2298
0
  len = xmlStrlen (uptr) + 1;
2299
0
    }
2300
2301
0
    if (nbslash == 0) {
2302
0
  if (uptr != NULL)
2303
      /* exception characters from xmlSaveUri */
2304
0
      val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2305
0
  goto done;
2306
0
    }
2307
2308
    /*
2309
     * Allocate just enough space for the returned string -
2310
     * length of the remainder of the URI, plus enough space
2311
     * for the "../" groups, plus one for the terminator
2312
     */
2313
0
    val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2314
0
    if (val == NULL) {
2315
0
        xmlURIErrMemory("building relative URI\n");
2316
0
  goto done;
2317
0
    }
2318
0
    vptr = val;
2319
    /*
2320
     * Put in as many "../" as needed
2321
     */
2322
0
    for (; nbslash>0; nbslash--) {
2323
0
  *vptr++ = '.';
2324
0
  *vptr++ = '.';
2325
0
  *vptr++ = '/';
2326
0
    }
2327
    /*
2328
     * Finish up with the end of the URI
2329
     */
2330
0
    if (uptr != NULL) {
2331
0
        if ((vptr > val) && (len > 0) &&
2332
0
      (uptr[0] == '/') && (vptr[-1] == '/')) {
2333
0
      memcpy (vptr, uptr + 1, len - 1);
2334
0
      vptr[len - 2] = 0;
2335
0
  } else {
2336
0
      memcpy (vptr, uptr, len);
2337
0
      vptr[len - 1] = 0;
2338
0
  }
2339
0
    } else {
2340
0
  vptr[len - 1] = 0;
2341
0
    }
2342
2343
    /* escape the freshly-built path */
2344
0
    vptr = val;
2345
  /* exception characters from xmlSaveUri */
2346
0
    val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2347
0
    xmlFree(vptr);
2348
2349
0
done:
2350
    /*
2351
     * Free the working variables
2352
     */
2353
0
    if (remove_path != 0)
2354
0
        ref->path = NULL;
2355
0
    if (ref != NULL)
2356
0
  xmlFreeURI (ref);
2357
0
    if (bas != NULL)
2358
0
  xmlFreeURI (bas);
2359
2360
0
    return val;
2361
0
}
2362
2363
/**
2364
 * xmlCanonicPath:
2365
 * @path:  the resource locator in a filesystem notation
2366
 *
2367
 * Constructs a canonic path from the specified path.
2368
 *
2369
 * Returns a new canonic path, or a duplicate of the path parameter if the
2370
 * construction fails. The caller is responsible for freeing the memory occupied
2371
 * by the returned string. If there is insufficient memory available, or the
2372
 * argument is NULL, the function returns NULL.
2373
 */
2374
#define IS_WINDOWS_PATH(p)          \
2375
  ((p != NULL) &&           \
2376
   (((p[0] >= 'a') && (p[0] <= 'z')) ||     \
2377
    ((p[0] >= 'A') && (p[0] <= 'Z'))) &&      \
2378
   (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2379
xmlChar *
2380
xmlCanonicPath(const xmlChar *path)
2381
0
{
2382
/*
2383
 * For Windows implementations, additional work needs to be done to
2384
 * replace backslashes in pathnames with "forward slashes"
2385
 */
2386
#if defined(_WIN32)
2387
    int len = 0;
2388
    char *p = NULL;
2389
#endif
2390
0
    xmlURIPtr uri;
2391
0
    xmlChar *ret;
2392
0
    const xmlChar *absuri;
2393
2394
0
    if (path == NULL)
2395
0
  return(NULL);
2396
2397
#if defined(_WIN32)
2398
    /*
2399
     * We must not change the backslashes to slashes if the the path
2400
     * starts with \\?\
2401
     * Those paths can be up to 32k characters long.
2402
     * Was added specifically for OpenOffice, those paths can't be converted
2403
     * to URIs anyway.
2404
     */
2405
    if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
2406
        (path[3] == '\\') )
2407
  return xmlStrdup((const xmlChar *) path);
2408
#endif
2409
2410
  /* sanitize filename starting with // so it can be used as URI */
2411
0
    if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2412
0
        path++;
2413
2414
0
    if ((uri = xmlParseURI((const char *) path)) != NULL) {
2415
0
  xmlFreeURI(uri);
2416
0
  return xmlStrdup(path);
2417
0
    }
2418
2419
    /* Check if this is an "absolute uri" */
2420
0
    absuri = xmlStrstr(path, BAD_CAST "://");
2421
0
    if (absuri != NULL) {
2422
0
        int l, j;
2423
0
  unsigned char c;
2424
0
  xmlChar *escURI;
2425
2426
        /*
2427
   * this looks like an URI where some parts have not been
2428
   * escaped leading to a parsing problem.  Check that the first
2429
   * part matches a protocol.
2430
   */
2431
0
  l = absuri - path;
2432
  /* Bypass if first part (part before the '://') is > 20 chars */
2433
0
  if ((l <= 0) || (l > 20))
2434
0
      goto path_processing;
2435
  /* Bypass if any non-alpha characters are present in first part */
2436
0
  for (j = 0;j < l;j++) {
2437
0
      c = path[j];
2438
0
      if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2439
0
          goto path_processing;
2440
0
  }
2441
2442
  /* Escape all except the characters specified in the supplied path */
2443
0
        escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2444
0
  if (escURI != NULL) {
2445
      /* Try parsing the escaped path */
2446
0
      uri = xmlParseURI((const char *) escURI);
2447
      /* If successful, return the escaped string */
2448
0
      if (uri != NULL) {
2449
0
          xmlFreeURI(uri);
2450
0
    return escURI;
2451
0
      }
2452
0
            xmlFree(escURI);
2453
0
  }
2454
0
    }
2455
2456
0
path_processing:
2457
/* For Windows implementations, replace backslashes with 'forward slashes' */
2458
#if defined(_WIN32)
2459
    /*
2460
     * Create a URI structure
2461
     */
2462
    uri = xmlCreateURI();
2463
    if (uri == NULL) {    /* Guard against 'out of memory' */
2464
        return(NULL);
2465
    }
2466
2467
    len = xmlStrlen(path);
2468
    if ((len > 2) && IS_WINDOWS_PATH(path)) {
2469
        /* make the scheme 'file' */
2470
  uri->scheme = (char *) xmlStrdup(BAD_CAST "file");
2471
  /* allocate space for leading '/' + path + string terminator */
2472
  uri->path = xmlMallocAtomic(len + 2);
2473
  if (uri->path == NULL) {
2474
      xmlFreeURI(uri);  /* Guard against 'out of memory' */
2475
      return(NULL);
2476
  }
2477
  /* Put in leading '/' plus path */
2478
  uri->path[0] = '/';
2479
  p = uri->path + 1;
2480
  strncpy(p, (char *) path, len + 1);
2481
    } else {
2482
  uri->path = (char *) xmlStrdup(path);
2483
  if (uri->path == NULL) {
2484
      xmlFreeURI(uri);
2485
      return(NULL);
2486
  }
2487
  p = uri->path;
2488
    }
2489
    /* Now change all occurrences of '\' to '/' */
2490
    while (*p != '\0') {
2491
  if (*p == '\\')
2492
      *p = '/';
2493
  p++;
2494
    }
2495
2496
    if (uri->scheme == NULL) {
2497
  ret = xmlStrdup((const xmlChar *) uri->path);
2498
    } else {
2499
  ret = xmlSaveUri(uri);
2500
    }
2501
2502
    xmlFreeURI(uri);
2503
#else
2504
0
    ret = xmlStrdup((const xmlChar *) path);
2505
0
#endif
2506
0
    return(ret);
2507
0
}
2508
2509
/**
2510
 * xmlPathToURI:
2511
 * @path:  the resource locator in a filesystem notation
2512
 *
2513
 * Constructs an URI expressing the existing path
2514
 *
2515
 * Returns a new URI, or a duplicate of the path parameter if the
2516
 * construction fails. The caller is responsible for freeing the memory
2517
 * occupied by the returned string. If there is insufficient memory available,
2518
 * or the argument is NULL, the function returns NULL.
2519
 */
2520
xmlChar *
2521
xmlPathToURI(const xmlChar *path)
2522
0
{
2523
0
    xmlURIPtr uri;
2524
0
    xmlURI temp;
2525
0
    xmlChar *ret, *cal;
2526
2527
0
    if (path == NULL)
2528
0
        return(NULL);
2529
2530
0
    if ((uri = xmlParseURI((const char *) path)) != NULL) {
2531
0
  xmlFreeURI(uri);
2532
0
  return xmlStrdup(path);
2533
0
    }
2534
0
    cal = xmlCanonicPath(path);
2535
0
    if (cal == NULL)
2536
0
        return(NULL);
2537
#if defined(_WIN32)
2538
    /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2539
       If 'cal' is a valid URI already then we are done here, as continuing would make
2540
       it invalid. */
2541
    if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2542
  xmlFreeURI(uri);
2543
  return cal;
2544
    }
2545
    /* 'cal' can contain a relative path with backslashes. If that is processed
2546
       by xmlSaveURI, they will be escaped and the external entity loader machinery
2547
       will fail. So convert them to slashes. Misuse 'ret' for walking. */
2548
    ret = cal;
2549
    while (*ret != '\0') {
2550
  if (*ret == '\\')
2551
      *ret = '/';
2552
  ret++;
2553
    }
2554
#endif
2555
0
    memset(&temp, 0, sizeof(temp));
2556
0
    temp.path = (char *) cal;
2557
0
    ret = xmlSaveUri(&temp);
2558
0
    xmlFree(cal);
2559
0
    return(ret);
2560
0
}