Coverage Report

Created: 2026-03-12 06:42

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libxml2-2.9.7/uri.c
Line
Count
Source
1
/**
2
 * uri.c: set of generic URI related routines
3
 *
4
 * Reference: RFCs 3986, 2732 and 2373
5
 *
6
 * See Copyright for the status of this software.
7
 *
8
 * daniel@veillard.com
9
 */
10
11
#define IN_LIBXML
12
#include "libxml.h"
13
14
#include <string.h>
15
16
#include <libxml/xmlmemory.h>
17
#include <libxml/uri.h>
18
#include <libxml/globals.h>
19
#include <libxml/xmlerror.h>
20
21
/**
22
 * MAX_URI_LENGTH:
23
 *
24
 * The definition of the URI regexp in the above RFC has no size limit
25
 * In practice they are usually relativey short except for the
26
 * data URI scheme as defined in RFC 2397. Even for data URI the usual
27
 * maximum size before hitting random practical limits is around 64 KB
28
 * and 4KB is usually a maximum admitted limit for proper operations.
29
 * The value below is more a security limit than anything else and
30
 * really should never be hit by 'normal' operations
31
 * Set to 1 MByte in 2012, this is only enforced on output
32
 */
33
3.52k
#define MAX_URI_LENGTH 1024 * 1024
34
35
static void
36
xmlURIErrMemory(const char *extra)
37
0
{
38
0
    if (extra)
39
0
        __xmlRaiseError(NULL, NULL, NULL,
40
0
                        NULL, NULL, XML_FROM_URI,
41
0
                        XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
42
0
                        extra, NULL, NULL, 0, 0,
43
0
                        "Memory allocation failed : %s\n", extra);
44
0
    else
45
0
        __xmlRaiseError(NULL, NULL, NULL,
46
0
                        NULL, NULL, XML_FROM_URI,
47
0
                        XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
48
0
                        NULL, NULL, NULL, 0, 0,
49
0
                        "Memory allocation failed\n");
50
0
}
51
52
static void xmlCleanURI(xmlURIPtr uri);
53
54
/*
55
 * Old rule from 2396 used in legacy handling code
56
 * alpha    = lowalpha | upalpha
57
 */
58
2.65M
#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
59
60
61
/*
62
 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
63
 *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
64
 *            "u" | "v" | "w" | "x" | "y" | "z"
65
 */
66
67
2.65M
#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
68
69
/*
70
 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
71
 *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
72
 *           "U" | "V" | "W" | "X" | "Y" | "Z"
73
 */
74
1.17M
#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
75
76
#ifdef IS_DIGIT
77
#undef IS_DIGIT
78
#endif
79
/*
80
 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
81
 */
82
1.07M
#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
83
84
/*
85
 * alphanum = alpha | digit
86
 */
87
88
2.65M
#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
89
90
/*
91
 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
92
 */
93
94
936k
#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||     \
95
936k
    ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||    \
96
936k
    ((x) == '(') || ((x) == ')'))
97
98
/*
99
 * unwise = "{" | "}" | "|" | "\" | "^" | "`"
100
 */
101
102
#define IS_UNWISE(p)                                                    \
103
0
      (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \
104
0
       ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||        \
105
0
       ((*(p) == ']')) || ((*(p) == '`')))
106
/*
107
 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
108
 *            "[" | "]"
109
 */
110
111
154k
#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
112
154k
        ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
113
154k
        ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
114
154k
        ((x) == ']'))
115
116
/*
117
 * unreserved = alphanum | mark
118
 */
119
120
1.32M
#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
121
122
/*
123
 * Skip to next pointer char, handle escaped sequences
124
 */
125
126
14.3M
#define NEXT(p) ((*p == '%')? p += 3 : p++)
127
128
/*
129
 * Productions from the spec.
130
 *
131
 *    authority     = server | reg_name
132
 *    reg_name      = 1*( unreserved | escaped | "$" | "," |
133
 *                        ";" | ":" | "@" | "&" | "=" | "+" )
134
 *
135
 * path          = [ abs_path | opaque_part ]
136
 */
137
138
436k
#define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
139
140
/************************************************************************
141
 *                  *
142
 *                         RFC 3986 parser        *
143
 *                  *
144
 ************************************************************************/
145
146
11.8M
#define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
147
20.5M
#define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||   \
148
20.5M
                      ((*(p) >= 'A') && (*(p) <= 'Z')))
149
#define ISA_HEXDIG(p)             \
150
789k
       (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||   \
151
789k
        ((*(p) >= 'A') && (*(p) <= 'F')))
152
153
/*
154
 *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
155
 *                     / "*" / "+" / "," / ";" / "="
156
 */
157
#define ISA_SUB_DELIM(p)            \
158
18.9M
      (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||   \
159
4.77M
       ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||   \
160
4.77M
       ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||   \
161
4.77M
       ((*(p) == '=')) || ((*(p) == '\'')))
162
163
/*
164
 *    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
165
 */
166
#define ISA_GEN_DELIM(p)            \
167
      (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) ||         \
168
       ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) ||         \
169
       ((*(p) == '@')))
170
171
/*
172
 *    reserved      = gen-delims / sub-delims
173
 */
174
#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
175
176
/*
177
 *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
178
 */
179
#define ISA_UNRESERVED(p)           \
180
33.1M
      ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||   \
181
16.5M
       ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
182
183
/*
184
 *    pct-encoded   = "%" HEXDIG HEXDIG
185
 */
186
#define ISA_PCT_ENCODED(p)            \
187
21.6M
     ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
188
189
/*
190
 *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
191
 */
192
#define ISA_PCHAR(p)              \
193
17.8M
     (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||  \
194
11.4M
      ((*(p) == ':')) || ((*(p) == '@')))
195
196
/**
197
 * xmlParse3986Scheme:
198
 * @uri:  pointer to an URI structure
199
 * @str:  pointer to the string to analyze
200
 *
201
 * Parse an URI scheme
202
 *
203
 * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
204
 *
205
 * Returns 0 or the error code
206
 */
207
static int
208
876k
xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
209
876k
    const char *cur;
210
211
876k
    if (str == NULL)
212
0
  return(-1);
213
214
876k
    cur = *str;
215
876k
    if (!ISA_ALPHA(cur))
216
504k
  return(2);
217
371k
    cur++;
218
1.53M
    while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
219
1.16M
           (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
220
371k
    if (uri != NULL) {
221
371k
  if (uri->scheme != NULL) xmlFree(uri->scheme);
222
371k
  uri->scheme = STRNDUP(*str, cur - *str);
223
371k
    }
224
371k
    *str = cur;
225
371k
    return(0);
226
876k
}
227
228
/**
229
 * xmlParse3986Fragment:
230
 * @uri:  pointer to an URI structure
231
 * @str:  pointer to the string to analyze
232
 *
233
 * Parse the query part of an URI
234
 *
235
 * fragment      = *( pchar / "/" / "?" )
236
 * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
237
 *       in the fragment identifier but this is used very broadly for
238
 *       xpointer scheme selection, so we are allowing it here to not break
239
 *       for example all the DocBook processing chains.
240
 *
241
 * Returns 0 or the error code
242
 */
243
static int
244
xmlParse3986Fragment(xmlURIPtr uri, const char **str)
245
48.9k
{
246
48.9k
    const char *cur;
247
248
48.9k
    if (str == NULL)
249
0
        return (-1);
250
251
48.9k
    cur = *str;
252
253
2.14M
    while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
254
61.6k
           (*cur == '[') || (*cur == ']') ||
255
48.9k
           ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
256
2.10M
        NEXT(cur);
257
48.9k
    if (uri != NULL) {
258
48.9k
        if (uri->fragment != NULL)
259
0
            xmlFree(uri->fragment);
260
48.9k
  if (uri->cleanup & 2)
261
0
      uri->fragment = STRNDUP(*str, cur - *str);
262
48.9k
  else
263
48.9k
      uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
264
48.9k
    }
265
48.9k
    *str = cur;
266
48.9k
    return (0);
267
48.9k
}
268
269
/**
270
 * xmlParse3986Query:
271
 * @uri:  pointer to an URI structure
272
 * @str:  pointer to the string to analyze
273
 *
274
 * Parse the query part of an URI
275
 *
276
 * query = *uric
277
 *
278
 * Returns 0 or the error code
279
 */
280
static int
281
xmlParse3986Query(xmlURIPtr uri, const char **str)
282
64.8k
{
283
64.8k
    const char *cur;
284
285
64.8k
    if (str == NULL)
286
0
        return (-1);
287
288
64.8k
    cur = *str;
289
290
927k
    while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
291
64.8k
           ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
292
863k
        NEXT(cur);
293
64.8k
    if (uri != NULL) {
294
64.8k
        if (uri->query != NULL)
295
0
            xmlFree(uri->query);
296
64.8k
  if (uri->cleanup & 2)
297
0
      uri->query = STRNDUP(*str, cur - *str);
298
64.8k
  else
299
64.8k
      uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
300
301
  /* Save the raw bytes of the query as well.
302
   * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
303
   */
304
64.8k
  if (uri->query_raw != NULL)
305
0
      xmlFree (uri->query_raw);
306
64.8k
  uri->query_raw = STRNDUP (*str, cur - *str);
307
64.8k
    }
308
64.8k
    *str = cur;
309
64.8k
    return (0);
310
64.8k
}
311
312
/**
313
 * xmlParse3986Port:
314
 * @uri:  pointer to an URI structure
315
 * @str:  the string to analyze
316
 *
317
 * Parse a port part and fills in the appropriate fields
318
 * of the @uri structure
319
 *
320
 * port          = *DIGIT
321
 *
322
 * Returns 0 or the error code
323
 */
324
static int
325
xmlParse3986Port(xmlURIPtr uri, const char **str)
326
23.0k
{
327
23.0k
    const char *cur = *str;
328
23.0k
    unsigned port = 0; /* unsigned for defined overflow behavior */
329
330
23.0k
    if (ISA_DIGIT(cur)) {
331
28.1k
  while (ISA_DIGIT(cur)) {
332
20.7k
      port = port * 10 + (*cur - '0');
333
334
20.7k
      cur++;
335
20.7k
  }
336
7.40k
  if (uri != NULL)
337
7.40k
      uri->port = port & INT_MAX; /* port value modulo INT_MAX+1 */
338
7.40k
  *str = cur;
339
7.40k
  return(0);
340
7.40k
    }
341
15.6k
    return(1);
342
23.0k
}
343
344
/**
345
 * xmlParse3986Userinfo:
346
 * @uri:  pointer to an URI structure
347
 * @str:  the string to analyze
348
 *
349
 * Parse an user informations part and fills in the appropriate fields
350
 * of the @uri structure
351
 *
352
 * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
353
 *
354
 * Returns 0 or the error code
355
 */
356
static int
357
xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
358
185k
{
359
185k
    const char *cur;
360
361
185k
    cur = *str;
362
2.66M
    while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
363
402k
           ISA_SUB_DELIM(cur) || (*cur == ':'))
364
2.47M
  NEXT(cur);
365
185k
    if (*cur == '@') {
366
11.3k
  if (uri != NULL) {
367
11.3k
      if (uri->user != NULL) xmlFree(uri->user);
368
11.3k
      if (uri->cleanup & 2)
369
0
    uri->user = STRNDUP(*str, cur - *str);
370
11.3k
      else
371
11.3k
    uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
372
11.3k
  }
373
11.3k
  *str = cur;
374
11.3k
  return(0);
375
11.3k
    }
376
174k
    return(1);
377
185k
}
378
379
/**
380
 * xmlParse3986DecOctet:
381
 * @str:  the string to analyze
382
 *
383
 *    dec-octet     = DIGIT                 ; 0-9
384
 *                  / %x31-39 DIGIT         ; 10-99
385
 *                  / "1" 2DIGIT            ; 100-199
386
 *                  / "2" %x30-34 DIGIT     ; 200-249
387
 *                  / "25" %x30-35          ; 250-255
388
 *
389
 * Skip a dec-octet.
390
 *
391
 * Returns 0 if found and skipped, 1 otherwise
392
 */
393
static int
394
19.6k
xmlParse3986DecOctet(const char **str) {
395
19.6k
    const char *cur = *str;
396
397
19.6k
    if (!(ISA_DIGIT(cur)))
398
1.58k
        return(1);
399
18.0k
    if (!ISA_DIGIT(cur+1))
400
11.0k
  cur++;
401
7.01k
    else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
402
4.27k
  cur += 2;
403
2.74k
    else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
404
472
  cur += 3;
405
2.26k
    else if ((*cur == '2') && (*(cur + 1) >= '0') &&
406
656
       (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
407
201
  cur += 3;
408
2.06k
    else if ((*cur == '2') && (*(cur + 1) == '5') &&
409
366
       (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
410
366
  cur += 3;
411
1.70k
    else
412
1.70k
        return(1);
413
16.3k
    *str = cur;
414
16.3k
    return(0);
415
18.0k
}
416
/**
417
 * xmlParse3986Host:
418
 * @uri:  pointer to an URI structure
419
 * @str:  the string to analyze
420
 *
421
 * Parse an host part and fills in the appropriate fields
422
 * of the @uri structure
423
 *
424
 * host          = IP-literal / IPv4address / reg-name
425
 * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
426
 * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
427
 * reg-name      = *( unreserved / pct-encoded / sub-delims )
428
 *
429
 * Returns 0 or the error code
430
 */
431
static int
432
xmlParse3986Host(xmlURIPtr uri, const char **str)
433
185k
{
434
185k
    const char *cur = *str;
435
185k
    const char *host;
436
437
185k
    host = cur;
438
    /*
439
     * IPv6 and future adressing scheme are enclosed between brackets
440
     */
441
185k
    if (*cur == '[') {
442
4.96k
        cur++;
443
806k
  while ((*cur != ']') && (*cur != 0))
444
801k
      cur++;
445
4.96k
  if (*cur != ']')
446
2.92k
      return(1);
447
2.03k
  cur++;
448
2.03k
  goto found;
449
4.96k
    }
450
    /*
451
     * try to parse an IPv4
452
     */
453
180k
    if (ISA_DIGIT(cur)) {
454
16.6k
        if (xmlParse3986DecOctet(&cur) != 0)
455
1.63k
      goto not_ipv4;
456
15.0k
  if (*cur != '.')
457
12.1k
      goto not_ipv4;
458
2.88k
  cur++;
459
2.88k
        if (xmlParse3986DecOctet(&cur) != 0)
460
1.57k
      goto not_ipv4;
461
1.30k
  if (*cur != '.')
462
1.22k
      goto not_ipv4;
463
76
        if (xmlParse3986DecOctet(&cur) != 0)
464
76
      goto not_ipv4;
465
0
  if (*cur != '.')
466
0
      goto not_ipv4;
467
0
        if (xmlParse3986DecOctet(&cur) != 0)
468
0
      goto not_ipv4;
469
0
  goto found;
470
16.6k
not_ipv4:
471
16.6k
        cur = *str;
472
16.6k
    }
473
    /*
474
     * then this should be a hostname which can be empty
475
     */
476
2.44M
    while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
477
2.26M
        NEXT(cur);
478
182k
found:
479
182k
    if (uri != NULL) {
480
182k
  if (uri->authority != NULL) xmlFree(uri->authority);
481
182k
  uri->authority = NULL;
482
182k
  if (uri->server != NULL) xmlFree(uri->server);
483
182k
  if (cur != host) {
484
162k
      if (uri->cleanup & 2)
485
0
    uri->server = STRNDUP(host, cur - host);
486
162k
      else
487
162k
    uri->server = xmlURIUnescapeString(host, cur - host, NULL);
488
162k
  } else
489
19.5k
      uri->server = NULL;
490
182k
    }
491
182k
    *str = cur;
492
182k
    return(0);
493
180k
}
494
495
/**
496
 * xmlParse3986Authority:
497
 * @uri:  pointer to an URI structure
498
 * @str:  the string to analyze
499
 *
500
 * Parse an authority part and fills in the appropriate fields
501
 * of the @uri structure
502
 *
503
 * authority     = [ userinfo "@" ] host [ ":" port ]
504
 *
505
 * Returns 0 or the error code
506
 */
507
static int
508
xmlParse3986Authority(xmlURIPtr uri, const char **str)
509
185k
{
510
185k
    const char *cur;
511
185k
    int ret;
512
513
185k
    cur = *str;
514
    /*
515
     * try to parse an userinfo and check for the trailing @
516
     */
517
185k
    ret = xmlParse3986Userinfo(uri, &cur);
518
185k
    if ((ret != 0) || (*cur != '@'))
519
174k
        cur = *str;
520
11.3k
    else
521
11.3k
        cur++;
522
185k
    ret = xmlParse3986Host(uri, &cur);
523
185k
    if (ret != 0) return(ret);
524
182k
    if (*cur == ':') {
525
23.0k
        cur++;
526
23.0k
        ret = xmlParse3986Port(uri, &cur);
527
23.0k
  if (ret != 0) return(ret);
528
23.0k
    }
529
166k
    *str = cur;
530
166k
    return(0);
531
182k
}
532
533
/**
534
 * xmlParse3986Segment:
535
 * @str:  the string to analyze
536
 * @forbid: an optional forbidden character
537
 * @empty: allow an empty segment
538
 *
539
 * Parse a segment and fills in the appropriate fields
540
 * of the @uri structure
541
 *
542
 * segment       = *pchar
543
 * segment-nz    = 1*pchar
544
 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
545
 *               ; non-zero-length segment without any colon ":"
546
 *
547
 * Returns 0 or the error code
548
 */
549
static int
550
xmlParse3986Segment(const char **str, char forbid, int empty)
551
1.26M
{
552
1.26M
    const char *cur;
553
554
1.26M
    cur = *str;
555
1.26M
    if (!ISA_PCHAR(cur)) {
556
379k
        if (empty)
557
348k
      return(0);
558
30.9k
  return(1);
559
379k
    }
560
6.38M
    while (ISA_PCHAR(cur) && (*cur != forbid))
561
5.49M
        NEXT(cur);
562
888k
    *str = cur;
563
888k
    return (0);
564
1.26M
}
565
566
/**
567
 * xmlParse3986PathAbEmpty:
568
 * @uri:  pointer to an URI structure
569
 * @str:  the string to analyze
570
 *
571
 * Parse an path absolute or empty and fills in the appropriate fields
572
 * of the @uri structure
573
 *
574
 * path-abempty  = *( "/" segment )
575
 *
576
 * Returns 0 or the error code
577
 */
578
static int
579
xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
580
166k
{
581
166k
    const char *cur;
582
166k
    int ret;
583
584
166k
    cur = *str;
585
586
531k
    while (*cur == '/') {
587
364k
        cur++;
588
364k
  ret = xmlParse3986Segment(&cur, 0, 1);
589
364k
  if (ret != 0) return(ret);
590
364k
    }
591
166k
    if (uri != NULL) {
592
166k
  if (uri->path != NULL) xmlFree(uri->path);
593
166k
        if (*str != cur) {
594
81.8k
            if (uri->cleanup & 2)
595
0
                uri->path = STRNDUP(*str, cur - *str);
596
81.8k
            else
597
81.8k
                uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
598
84.9k
        } else {
599
84.9k
            uri->path = NULL;
600
84.9k
        }
601
166k
    }
602
166k
    *str = cur;
603
166k
    return (0);
604
166k
}
605
606
/**
607
 * xmlParse3986PathAbsolute:
608
 * @uri:  pointer to an URI structure
609
 * @str:  the string to analyze
610
 *
611
 * Parse an path absolute and fills in the appropriate fields
612
 * of the @uri structure
613
 *
614
 * path-absolute = "/" [ segment-nz *( "/" segment ) ]
615
 *
616
 * Returns 0 or the error code
617
 */
618
static int
619
xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
620
67.3k
{
621
67.3k
    const char *cur;
622
67.3k
    int ret;
623
624
67.3k
    cur = *str;
625
626
67.3k
    if (*cur != '/')
627
0
        return(1);
628
67.3k
    cur++;
629
67.3k
    ret = xmlParse3986Segment(&cur, 0, 0);
630
67.3k
    if (ret == 0) {
631
66.7k
  while (*cur == '/') {
632
30.3k
      cur++;
633
30.3k
      ret = xmlParse3986Segment(&cur, 0, 1);
634
30.3k
      if (ret != 0) return(ret);
635
30.3k
  }
636
36.4k
    }
637
67.3k
    if (uri != NULL) {
638
67.3k
  if (uri->path != NULL) xmlFree(uri->path);
639
67.3k
        if (cur != *str) {
640
67.3k
            if (uri->cleanup & 2)
641
0
                uri->path = STRNDUP(*str, cur - *str);
642
67.3k
            else
643
67.3k
                uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
644
67.3k
        } else {
645
0
            uri->path = NULL;
646
0
        }
647
67.3k
    }
648
67.3k
    *str = cur;
649
67.3k
    return (0);
650
67.3k
}
651
652
/**
653
 * xmlParse3986PathRootless:
654
 * @uri:  pointer to an URI structure
655
 * @str:  the string to analyze
656
 *
657
 * Parse an path without root and fills in the appropriate fields
658
 * of the @uri structure
659
 *
660
 * path-rootless = segment-nz *( "/" segment )
661
 *
662
 * Returns 0 or the error code
663
 */
664
static int
665
xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
666
96.9k
{
667
96.9k
    const char *cur;
668
96.9k
    int ret;
669
670
96.9k
    cur = *str;
671
672
96.9k
    ret = xmlParse3986Segment(&cur, 0, 0);
673
96.9k
    if (ret != 0) return(ret);
674
145k
    while (*cur == '/') {
675
48.3k
        cur++;
676
48.3k
  ret = xmlParse3986Segment(&cur, 0, 1);
677
48.3k
  if (ret != 0) return(ret);
678
48.3k
    }
679
96.9k
    if (uri != NULL) {
680
96.9k
  if (uri->path != NULL) xmlFree(uri->path);
681
96.9k
        if (cur != *str) {
682
96.9k
            if (uri->cleanup & 2)
683
0
                uri->path = STRNDUP(*str, cur - *str);
684
96.9k
            else
685
96.9k
                uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
686
96.9k
        } else {
687
0
            uri->path = NULL;
688
0
        }
689
96.9k
    }
690
96.9k
    *str = cur;
691
96.9k
    return (0);
692
96.9k
}
693
694
/**
695
 * xmlParse3986PathNoScheme:
696
 * @uri:  pointer to an URI structure
697
 * @str:  the string to analyze
698
 *
699
 * Parse an path which is not a scheme and fills in the appropriate fields
700
 * of the @uri structure
701
 *
702
 * path-noscheme = segment-nz-nc *( "/" segment )
703
 *
704
 * Returns 0 or the error code
705
 */
706
static int
707
xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
708
367k
{
709
367k
    const char *cur;
710
367k
    int ret;
711
712
367k
    cur = *str;
713
714
367k
    ret = xmlParse3986Segment(&cur, ':', 0);
715
367k
    if (ret != 0) return(ret);
716
660k
    while (*cur == '/') {
717
293k
        cur++;
718
293k
  ret = xmlParse3986Segment(&cur, 0, 1);
719
293k
  if (ret != 0) return(ret);
720
293k
    }
721
367k
    if (uri != NULL) {
722
367k
  if (uri->path != NULL) xmlFree(uri->path);
723
367k
        if (cur != *str) {
724
361k
            if (uri->cleanup & 2)
725
0
                uri->path = STRNDUP(*str, cur - *str);
726
361k
            else
727
361k
                uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
728
361k
        } else {
729
5.84k
            uri->path = NULL;
730
5.84k
        }
731
367k
    }
732
367k
    *str = cur;
733
367k
    return (0);
734
367k
}
735
736
/**
737
 * xmlParse3986HierPart:
738
 * @uri:  pointer to an URI structure
739
 * @str:  the string to analyze
740
 *
741
 * Parse an hierarchical part and fills in the appropriate fields
742
 * of the @uri structure
743
 *
744
 * hier-part     = "//" authority path-abempty
745
 *                / path-absolute
746
 *                / path-rootless
747
 *                / path-empty
748
 *
749
 * Returns 0 or the error code
750
 */
751
static int
752
xmlParse3986HierPart(xmlURIPtr uri, const char **str)
753
275k
{
754
275k
    const char *cur;
755
275k
    int ret;
756
757
275k
    cur = *str;
758
759
275k
    if ((*cur == '/') && (*(cur + 1) == '/')) {
760
124k
        cur += 2;
761
124k
  ret = xmlParse3986Authority(uri, &cur);
762
124k
  if (ret != 0) return(ret);
763
111k
  if (uri->server == NULL)
764
3.66k
      uri->port = -1;
765
111k
  ret = xmlParse3986PathAbEmpty(uri, &cur);
766
111k
  if (ret != 0) return(ret);
767
111k
  *str = cur;
768
111k
  return(0);
769
151k
    } else if (*cur == '/') {
770
16.2k
        ret = xmlParse3986PathAbsolute(uri, &cur);
771
16.2k
  if (ret != 0) return(ret);
772
135k
    } else if (ISA_PCHAR(cur)) {
773
96.9k
        ret = xmlParse3986PathRootless(uri, &cur);
774
96.9k
  if (ret != 0) return(ret);
775
96.9k
    } else {
776
  /* path-empty is effectively empty */
777
38.3k
  if (uri != NULL) {
778
38.3k
      if (uri->path != NULL) xmlFree(uri->path);
779
38.3k
      uri->path = NULL;
780
38.3k
  }
781
38.3k
    }
782
151k
    *str = cur;
783
151k
    return (0);
784
275k
}
785
786
/**
787
 * xmlParse3986RelativeRef:
788
 * @uri:  pointer to an URI structure
789
 * @str:  the string to analyze
790
 *
791
 * Parse an URI string and fills in the appropriate fields
792
 * of the @uri structure
793
 *
794
 * relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
795
 * relative-part = "//" authority path-abempty
796
 *               / path-absolute
797
 *               / path-noscheme
798
 *               / path-empty
799
 *
800
 * Returns 0 or the error code
801
 */
802
static int
803
730k
xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
804
730k
    int ret;
805
806
730k
    if ((*str == '/') && (*(str + 1) == '/')) {
807
61.0k
        str += 2;
808
61.0k
  ret = xmlParse3986Authority(uri, &str);
809
61.0k
  if (ret != 0) return(ret);
810
55.2k
  ret = xmlParse3986PathAbEmpty(uri, &str);
811
55.2k
  if (ret != 0) return(ret);
812
669k
    } else if (*str == '/') {
813
51.0k
  ret = xmlParse3986PathAbsolute(uri, &str);
814
51.0k
  if (ret != 0) return(ret);
815
618k
    } else if (ISA_PCHAR(str)) {
816
367k
        ret = xmlParse3986PathNoScheme(uri, &str);
817
367k
  if (ret != 0) return(ret);
818
367k
    } else {
819
  /* path-empty is effectively empty */
820
251k
  if (uri != NULL) {
821
251k
      if (uri->path != NULL) xmlFree(uri->path);
822
251k
      uri->path = NULL;
823
251k
  }
824
251k
    }
825
826
725k
    if (*str == '?') {
827
52.5k
  str++;
828
52.5k
  ret = xmlParse3986Query(uri, &str);
829
52.5k
  if (ret != 0) return(ret);
830
52.5k
    }
831
725k
    if (*str == '#') {
832
38.0k
  str++;
833
38.0k
  ret = xmlParse3986Fragment(uri, &str);
834
38.0k
  if (ret != 0) return(ret);
835
38.0k
    }
836
725k
    if (*str != 0) {
837
386k
  xmlCleanURI(uri);
838
386k
  return(1);
839
386k
    }
840
338k
    return(0);
841
725k
}
842
843
844
/**
845
 * xmlParse3986URI:
846
 * @uri:  pointer to an URI structure
847
 * @str:  the string to analyze
848
 *
849
 * Parse an URI string and fills in the appropriate fields
850
 * of the @uri structure
851
 *
852
 * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
853
 *
854
 * Returns 0 or the error code
855
 */
856
static int
857
876k
xmlParse3986URI(xmlURIPtr uri, const char *str) {
858
876k
    int ret;
859
860
876k
    ret = xmlParse3986Scheme(uri, &str);
861
876k
    if (ret != 0) return(ret);
862
371k
    if (*str != ':') {
863
95.7k
  return(1);
864
95.7k
    }
865
275k
    str++;
866
275k
    ret = xmlParse3986HierPart(uri, &str);
867
275k
    if (ret != 0) return(ret);
868
263k
    if (*str == '?') {
869
12.2k
  str++;
870
12.2k
  ret = xmlParse3986Query(uri, &str);
871
12.2k
  if (ret != 0) return(ret);
872
12.2k
    }
873
263k
    if (*str == '#') {
874
10.9k
  str++;
875
10.9k
  ret = xmlParse3986Fragment(uri, &str);
876
10.9k
  if (ret != 0) return(ret);
877
10.9k
    }
878
263k
    if (*str != 0) {
879
117k
  xmlCleanURI(uri);
880
117k
  return(1);
881
117k
    }
882
145k
    return(0);
883
263k
}
884
885
/**
886
 * xmlParse3986URIReference:
887
 * @uri:  pointer to an URI structure
888
 * @str:  the string to analyze
889
 *
890
 * Parse an URI reference string and fills in the appropriate fields
891
 * of the @uri structure
892
 *
893
 * URI-reference = URI / relative-ref
894
 *
895
 * Returns 0 or the error code
896
 */
897
static int
898
876k
xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
899
876k
    int ret;
900
901
876k
    if (str == NULL)
902
0
  return(-1);
903
876k
    xmlCleanURI(uri);
904
905
    /*
906
     * Try first to parse absolute refs, then fallback to relative if
907
     * it fails.
908
     */
909
876k
    ret = xmlParse3986URI(uri, str);
910
876k
    if (ret != 0) {
911
730k
  xmlCleanURI(uri);
912
730k
        ret = xmlParse3986RelativeRef(uri, str);
913
730k
  if (ret != 0) {
914
392k
      xmlCleanURI(uri);
915
392k
      return(ret);
916
392k
  }
917
730k
    }
918
483k
    return(0);
919
876k
}
920
921
/**
922
 * xmlParseURI:
923
 * @str:  the URI string to analyze
924
 *
925
 * Parse an URI based on RFC 3986
926
 *
927
 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
928
 *
929
 * Returns a newly built xmlURIPtr or NULL in case of error
930
 */
931
xmlURIPtr
932
864k
xmlParseURI(const char *str) {
933
864k
    xmlURIPtr uri;
934
864k
    int ret;
935
936
864k
    if (str == NULL)
937
0
  return(NULL);
938
864k
    uri = xmlCreateURI();
939
864k
    if (uri != NULL) {
940
864k
  ret = xmlParse3986URIReference(uri, str);
941
864k
        if (ret) {
942
390k
      xmlFreeURI(uri);
943
390k
      return(NULL);
944
390k
  }
945
864k
    }
946
473k
    return(uri);
947
864k
}
948
949
/**
950
 * xmlParseURIReference:
951
 * @uri:  pointer to an URI structure
952
 * @str:  the string to analyze
953
 *
954
 * Parse an URI reference string based on RFC 3986 and fills in the
955
 * appropriate fields of the @uri structure
956
 *
957
 * URI-reference = URI / relative-ref
958
 *
959
 * Returns 0 or the error code
960
 */
961
int
962
12.0k
xmlParseURIReference(xmlURIPtr uri, const char *str) {
963
12.0k
    return(xmlParse3986URIReference(uri, str));
964
12.0k
}
965
966
/**
967
 * xmlParseURIRaw:
968
 * @str:  the URI string to analyze
969
 * @raw:  if 1 unescaping of URI pieces are disabled
970
 *
971
 * Parse an URI but allows to keep intact the original fragments.
972
 *
973
 * URI-reference = URI / relative-ref
974
 *
975
 * Returns a newly built xmlURIPtr or NULL in case of error
976
 */
977
xmlURIPtr
978
0
xmlParseURIRaw(const char *str, int raw) {
979
0
    xmlURIPtr uri;
980
0
    int ret;
981
982
0
    if (str == NULL)
983
0
  return(NULL);
984
0
    uri = xmlCreateURI();
985
0
    if (uri != NULL) {
986
0
        if (raw) {
987
0
      uri->cleanup |= 2;
988
0
  }
989
0
  ret = xmlParseURIReference(uri, str);
990
0
        if (ret) {
991
0
      xmlFreeURI(uri);
992
0
      return(NULL);
993
0
  }
994
0
    }
995
0
    return(uri);
996
0
}
997
998
/************************************************************************
999
 *                  *
1000
 *      Generic URI structure functions     *
1001
 *                  *
1002
 ************************************************************************/
1003
1004
/**
1005
 * xmlCreateURI:
1006
 *
1007
 * Simply creates an empty xmlURI
1008
 *
1009
 * Returns the new structure or NULL in case of error
1010
 */
1011
xmlURIPtr
1012
880k
xmlCreateURI(void) {
1013
880k
    xmlURIPtr ret;
1014
1015
880k
    ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
1016
880k
    if (ret == NULL) {
1017
0
        xmlURIErrMemory("creating URI structure\n");
1018
0
  return(NULL);
1019
0
    }
1020
880k
    memset(ret, 0, sizeof(xmlURI));
1021
880k
    return(ret);
1022
880k
}
1023
1024
/**
1025
 * xmlSaveUriRealloc:
1026
 *
1027
 * Function to handle properly a reallocation when saving an URI
1028
 * Also imposes some limit on the length of an URI string output
1029
 */
1030
static xmlChar *
1031
3.52k
xmlSaveUriRealloc(xmlChar *ret, int *max) {
1032
3.52k
    xmlChar *temp;
1033
3.52k
    int tmp;
1034
1035
3.52k
    if (*max > MAX_URI_LENGTH) {
1036
0
        xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
1037
0
        return(NULL);
1038
0
    }
1039
3.52k
    tmp = *max * 2;
1040
3.52k
    temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
1041
3.52k
    if (temp == NULL) {
1042
0
        xmlURIErrMemory("saving URI\n");
1043
0
        return(NULL);
1044
0
    }
1045
3.52k
    *max = tmp;
1046
3.52k
    return(temp);
1047
3.52k
}
1048
1049
/**
1050
 * xmlSaveUri:
1051
 * @uri:  pointer to an xmlURI
1052
 *
1053
 * Save the URI as an escaped string
1054
 *
1055
 * Returns a new string (to be deallocated by caller)
1056
 */
1057
xmlChar *
1058
6.38k
xmlSaveUri(xmlURIPtr uri) {
1059
6.38k
    xmlChar *ret = NULL;
1060
6.38k
    xmlChar *temp;
1061
6.38k
    const char *p;
1062
6.38k
    int len;
1063
6.38k
    int max;
1064
1065
6.38k
    if (uri == NULL) return(NULL);
1066
1067
1068
6.38k
    max = 80;
1069
6.38k
    ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
1070
6.38k
    if (ret == NULL) {
1071
0
        xmlURIErrMemory("saving URI\n");
1072
0
  return(NULL);
1073
0
    }
1074
6.38k
    len = 0;
1075
1076
6.38k
    if (uri->scheme != NULL) {
1077
0
  p = uri->scheme;
1078
0
  while (*p != 0) {
1079
0
      if (len >= max) {
1080
0
                temp = xmlSaveUriRealloc(ret, &max);
1081
0
                if (temp == NULL) goto mem_error;
1082
0
    ret = temp;
1083
0
      }
1084
0
      ret[len++] = *p++;
1085
0
  }
1086
0
  if (len >= max) {
1087
0
            temp = xmlSaveUriRealloc(ret, &max);
1088
0
            if (temp == NULL) goto mem_error;
1089
0
            ret = temp;
1090
0
  }
1091
0
  ret[len++] = ':';
1092
0
    }
1093
6.38k
    if (uri->opaque != NULL) {
1094
0
  p = uri->opaque;
1095
0
  while (*p != 0) {
1096
0
      if (len + 3 >= max) {
1097
0
                temp = xmlSaveUriRealloc(ret, &max);
1098
0
                if (temp == NULL) goto mem_error;
1099
0
                ret = temp;
1100
0
      }
1101
0
      if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1102
0
    ret[len++] = *p++;
1103
0
      else {
1104
0
    int val = *(unsigned char *)p++;
1105
0
    int hi = val / 0x10, lo = val % 0x10;
1106
0
    ret[len++] = '%';
1107
0
    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1108
0
    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1109
0
      }
1110
0
  }
1111
6.38k
    } else {
1112
6.38k
  if ((uri->server != NULL) || (uri->port == -1)) {
1113
2.27k
      if (len + 3 >= max) {
1114
0
                temp = xmlSaveUriRealloc(ret, &max);
1115
0
                if (temp == NULL) goto mem_error;
1116
0
                ret = temp;
1117
0
      }
1118
2.27k
      ret[len++] = '/';
1119
2.27k
      ret[len++] = '/';
1120
2.27k
      if (uri->user != NULL) {
1121
1.43k
    p = uri->user;
1122
639k
    while (*p != 0) {
1123
637k
        if (len + 3 >= max) {
1124
1.22k
                        temp = xmlSaveUriRealloc(ret, &max);
1125
1.22k
                        if (temp == NULL) goto mem_error;
1126
1.22k
                        ret = temp;
1127
1.22k
        }
1128
637k
        if ((IS_UNRESERVED(*(p))) ||
1129
415k
      ((*(p) == ';')) || ((*(p) == ':')) ||
1130
396k
      ((*(p) == '&')) || ((*(p) == '=')) ||
1131
337k
      ((*(p) == '+')) || ((*(p) == '$')) ||
1132
330k
      ((*(p) == ',')))
1133
347k
      ret[len++] = *p++;
1134
290k
        else {
1135
290k
      int val = *(unsigned char *)p++;
1136
290k
      int hi = val / 0x10, lo = val % 0x10;
1137
290k
      ret[len++] = '%';
1138
290k
      ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1139
290k
      ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1140
290k
        }
1141
637k
    }
1142
1.43k
    if (len + 3 >= max) {
1143
17
                    temp = xmlSaveUriRealloc(ret, &max);
1144
17
                    if (temp == NULL) goto mem_error;
1145
17
                    ret = temp;
1146
17
    }
1147
1.43k
    ret[len++] = '@';
1148
1.43k
      }
1149
2.27k
      if (uri->server != NULL) {
1150
2.27k
    p = uri->server;
1151
350k
    while (*p != 0) {
1152
347k
        if (len >= max) {
1153
351
      temp = xmlSaveUriRealloc(ret, &max);
1154
351
      if (temp == NULL) goto mem_error;
1155
351
      ret = temp;
1156
351
        }
1157
347k
        ret[len++] = *p++;
1158
347k
    }
1159
2.27k
    if (uri->port > 0) {
1160
105
        if (len + 10 >= max) {
1161
9
      temp = xmlSaveUriRealloc(ret, &max);
1162
9
      if (temp == NULL) goto mem_error;
1163
9
      ret = temp;
1164
9
        }
1165
105
        len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1166
105
    }
1167
2.27k
      }
1168
4.10k
  } else if (uri->authority != NULL) {
1169
0
      if (len + 3 >= max) {
1170
0
                temp = xmlSaveUriRealloc(ret, &max);
1171
0
                if (temp == NULL) goto mem_error;
1172
0
                ret = temp;
1173
0
      }
1174
0
      ret[len++] = '/';
1175
0
      ret[len++] = '/';
1176
0
      p = uri->authority;
1177
0
      while (*p != 0) {
1178
0
    if (len + 3 >= max) {
1179
0
                    temp = xmlSaveUriRealloc(ret, &max);
1180
0
                    if (temp == NULL) goto mem_error;
1181
0
                    ret = temp;
1182
0
    }
1183
0
    if ((IS_UNRESERVED(*(p))) ||
1184
0
                    ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1185
0
                    ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1186
0
                    ((*(p) == '=')) || ((*(p) == '+')))
1187
0
        ret[len++] = *p++;
1188
0
    else {
1189
0
        int val = *(unsigned char *)p++;
1190
0
        int hi = val / 0x10, lo = val % 0x10;
1191
0
        ret[len++] = '%';
1192
0
        ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1193
0
        ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1194
0
    }
1195
0
      }
1196
4.10k
  } else if (uri->scheme != NULL) {
1197
0
      if (len + 3 >= max) {
1198
0
                temp = xmlSaveUriRealloc(ret, &max);
1199
0
                if (temp == NULL) goto mem_error;
1200
0
                ret = temp;
1201
0
      }
1202
0
  }
1203
6.38k
  if (uri->path != NULL) {
1204
3.68k
      p = uri->path;
1205
      /*
1206
       * the colon in file:///d: should not be escaped or
1207
       * Windows accesses fail later.
1208
       */
1209
3.68k
      if ((uri->scheme != NULL) &&
1210
0
    (p[0] == '/') &&
1211
0
    (((p[1] >= 'a') && (p[1] <= 'z')) ||
1212
0
     ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1213
0
    (p[2] == ':') &&
1214
0
          (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1215
0
    if (len + 3 >= max) {
1216
0
                    temp = xmlSaveUriRealloc(ret, &max);
1217
0
                    if (temp == NULL) goto mem_error;
1218
0
                    ret = temp;
1219
0
    }
1220
0
    ret[len++] = *p++;
1221
0
    ret[len++] = *p++;
1222
0
    ret[len++] = *p++;
1223
0
      }
1224
292k
      while (*p != 0) {
1225
288k
    if (len + 3 >= max) {
1226
702
                    temp = xmlSaveUriRealloc(ret, &max);
1227
702
                    if (temp == NULL) goto mem_error;
1228
702
                    ret = temp;
1229
702
    }
1230
288k
    if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1231
45.7k
                    ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1232
24.8k
              ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1233
13.0k
              ((*(p) == ',')))
1234
280k
        ret[len++] = *p++;
1235
8.02k
    else {
1236
8.02k
        int val = *(unsigned char *)p++;
1237
8.02k
        int hi = val / 0x10, lo = val % 0x10;
1238
8.02k
        ret[len++] = '%';
1239
8.02k
        ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1240
8.02k
        ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1241
8.02k
    }
1242
288k
      }
1243
3.68k
  }
1244
6.38k
  if (uri->query_raw != NULL) {
1245
870
      if (len + 1 >= max) {
1246
35
                temp = xmlSaveUriRealloc(ret, &max);
1247
35
                if (temp == NULL) goto mem_error;
1248
35
                ret = temp;
1249
35
      }
1250
870
      ret[len++] = '?';
1251
870
      p = uri->query_raw;
1252
85.0k
      while (*p != 0) {
1253
84.1k
    if (len + 1 >= max) {
1254
274
                    temp = xmlSaveUriRealloc(ret, &max);
1255
274
                    if (temp == NULL) goto mem_error;
1256
274
                    ret = temp;
1257
274
    }
1258
84.1k
    ret[len++] = *p++;
1259
84.1k
      }
1260
5.51k
  } else if (uri->query != NULL) {
1261
0
      if (len + 3 >= max) {
1262
0
                temp = xmlSaveUriRealloc(ret, &max);
1263
0
                if (temp == NULL) goto mem_error;
1264
0
                ret = temp;
1265
0
      }
1266
0
      ret[len++] = '?';
1267
0
      p = uri->query;
1268
0
      while (*p != 0) {
1269
0
    if (len + 3 >= max) {
1270
0
                    temp = xmlSaveUriRealloc(ret, &max);
1271
0
                    if (temp == NULL) goto mem_error;
1272
0
                    ret = temp;
1273
0
    }
1274
0
    if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1275
0
        ret[len++] = *p++;
1276
0
    else {
1277
0
        int val = *(unsigned char *)p++;
1278
0
        int hi = val / 0x10, lo = val % 0x10;
1279
0
        ret[len++] = '%';
1280
0
        ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1281
0
        ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1282
0
    }
1283
0
      }
1284
0
  }
1285
6.38k
    }
1286
6.38k
    if (uri->fragment != NULL) {
1287
2.04k
  if (len + 3 >= max) {
1288
31
            temp = xmlSaveUriRealloc(ret, &max);
1289
31
            if (temp == NULL) goto mem_error;
1290
31
            ret = temp;
1291
31
  }
1292
2.04k
  ret[len++] = '#';
1293
2.04k
  p = uri->fragment;
1294
401k
  while (*p != 0) {
1295
399k
      if (len + 3 >= max) {
1296
874
                temp = xmlSaveUriRealloc(ret, &max);
1297
874
                if (temp == NULL) goto mem_error;
1298
874
                ret = temp;
1299
874
      }
1300
399k
      if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1301
395k
    ret[len++] = *p++;
1302
3.96k
      else {
1303
3.96k
    int val = *(unsigned char *)p++;
1304
3.96k
    int hi = val / 0x10, lo = val % 0x10;
1305
3.96k
    ret[len++] = '%';
1306
3.96k
    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1307
3.96k
    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1308
3.96k
      }
1309
399k
  }
1310
2.04k
    }
1311
6.38k
    if (len >= max) {
1312
14
        temp = xmlSaveUriRealloc(ret, &max);
1313
14
        if (temp == NULL) goto mem_error;
1314
14
        ret = temp;
1315
14
    }
1316
6.38k
    ret[len] = 0;
1317
6.38k
    return(ret);
1318
1319
0
mem_error:
1320
0
    xmlFree(ret);
1321
0
    return(NULL);
1322
6.38k
}
1323
1324
/**
1325
 * xmlPrintURI:
1326
 * @stream:  a FILE* for the output
1327
 * @uri:  pointer to an xmlURI
1328
 *
1329
 * Prints the URI in the stream @stream.
1330
 */
1331
void
1332
0
xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1333
0
    xmlChar *out;
1334
1335
0
    out = xmlSaveUri(uri);
1336
0
    if (out != NULL) {
1337
0
  fprintf(stream, "%s", (char *) out);
1338
0
  xmlFree(out);
1339
0
    }
1340
0
}
1341
1342
/**
1343
 * xmlCleanURI:
1344
 * @uri:  pointer to an xmlURI
1345
 *
1346
 * Make sure the xmlURI struct is free of content
1347
 */
1348
static void
1349
2.50M
xmlCleanURI(xmlURIPtr uri) {
1350
2.50M
    if (uri == NULL) return;
1351
1352
2.50M
    if (uri->scheme != NULL) xmlFree(uri->scheme);
1353
2.50M
    uri->scheme = NULL;
1354
2.50M
    if (uri->server != NULL) xmlFree(uri->server);
1355
2.50M
    uri->server = NULL;
1356
2.50M
    if (uri->user != NULL) xmlFree(uri->user);
1357
2.50M
    uri->user = NULL;
1358
2.50M
    if (uri->path != NULL) xmlFree(uri->path);
1359
2.50M
    uri->path = NULL;
1360
2.50M
    if (uri->fragment != NULL) xmlFree(uri->fragment);
1361
2.50M
    uri->fragment = NULL;
1362
2.50M
    if (uri->opaque != NULL) xmlFree(uri->opaque);
1363
2.50M
    uri->opaque = NULL;
1364
2.50M
    if (uri->authority != NULL) xmlFree(uri->authority);
1365
2.50M
    uri->authority = NULL;
1366
2.50M
    if (uri->query != NULL) xmlFree(uri->query);
1367
2.50M
    uri->query = NULL;
1368
2.50M
    if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1369
2.50M
    uri->query_raw = NULL;
1370
2.50M
}
1371
1372
/**
1373
 * xmlFreeURI:
1374
 * @uri:  pointer to an xmlURI
1375
 *
1376
 * Free up the xmlURI struct
1377
 */
1378
void
1379
880k
xmlFreeURI(xmlURIPtr uri) {
1380
880k
    if (uri == NULL) return;
1381
1382
880k
    if (uri->scheme != NULL) xmlFree(uri->scheme);
1383
880k
    if (uri->server != NULL) xmlFree(uri->server);
1384
880k
    if (uri->user != NULL) xmlFree(uri->user);
1385
880k
    if (uri->path != NULL) xmlFree(uri->path);
1386
880k
    if (uri->fragment != NULL) xmlFree(uri->fragment);
1387
880k
    if (uri->opaque != NULL) xmlFree(uri->opaque);
1388
880k
    if (uri->authority != NULL) xmlFree(uri->authority);
1389
880k
    if (uri->query != NULL) xmlFree(uri->query);
1390
880k
    if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1391
880k
    xmlFree(uri);
1392
880k
}
1393
1394
/************************************************************************
1395
 *                  *
1396
 *      Helper functions        *
1397
 *                  *
1398
 ************************************************************************/
1399
1400
/**
1401
 * xmlNormalizeURIPath:
1402
 * @path:  pointer to the path string
1403
 *
1404
 * Applies the 5 normalization steps to a path string--that is, RFC 2396
1405
 * Section 5.2, steps 6.c through 6.g.
1406
 *
1407
 * Normalization occurs directly on the string, no new allocation is done
1408
 *
1409
 * Returns 0 or an error code
1410
 */
1411
int
1412
1.98k
xmlNormalizeURIPath(char *path) {
1413
1.98k
    char *cur, *out;
1414
1415
1.98k
    if (path == NULL)
1416
0
  return(-1);
1417
1418
    /* Skip all initial "/" chars.  We want to get to the beginning of the
1419
     * first non-empty segment.
1420
     */
1421
1.98k
    cur = path;
1422
2.13k
    while (cur[0] == '/')
1423
148
      ++cur;
1424
1.98k
    if (cur[0] == '\0')
1425
8
      return(0);
1426
1427
    /* Keep everything we've seen so far.  */
1428
1.97k
    out = cur;
1429
1430
    /*
1431
     * Analyze each segment in sequence for cases (c) and (d).
1432
     */
1433
20.6k
    while (cur[0] != '\0') {
1434
  /*
1435
   * c) All occurrences of "./", where "." is a complete path segment,
1436
   *    are removed from the buffer string.
1437
   */
1438
20.5k
  if ((cur[0] == '.') && (cur[1] == '/')) {
1439
3.12k
      cur += 2;
1440
      /* '//' normalization should be done at this point too */
1441
4.90k
      while (cur[0] == '/')
1442
1.77k
    cur++;
1443
3.12k
      continue;
1444
3.12k
  }
1445
1446
  /*
1447
   * d) If the buffer string ends with "." as a complete path segment,
1448
   *    that "." is removed.
1449
   */
1450
17.4k
  if ((cur[0] == '.') && (cur[1] == '\0'))
1451
92
      break;
1452
1453
  /* Otherwise keep the segment.  */
1454
96.0k
  while (cur[0] != '/') {
1455
80.5k
            if (cur[0] == '\0')
1456
1.76k
              goto done_cd;
1457
78.7k
      (out++)[0] = (cur++)[0];
1458
78.7k
  }
1459
  /* nomalize // */
1460
20.3k
  while ((cur[0] == '/') && (cur[1] == '/'))
1461
4.77k
      cur++;
1462
1463
15.5k
        (out++)[0] = (cur++)[0];
1464
15.5k
    }
1465
1.97k
 done_cd:
1466
1.97k
    out[0] = '\0';
1467
1468
    /* Reset to the beginning of the first segment for the next sequence.  */
1469
1.97k
    cur = path;
1470
2.12k
    while (cur[0] == '/')
1471
148
      ++cur;
1472
1.97k
    if (cur[0] == '\0')
1473
13
  return(0);
1474
1475
    /*
1476
     * Analyze each segment in sequence for cases (e) and (f).
1477
     *
1478
     * e) All occurrences of "<segment>/../", where <segment> is a
1479
     *    complete path segment not equal to "..", are removed from the
1480
     *    buffer string.  Removal of these path segments is performed
1481
     *    iteratively, removing the leftmost matching pattern on each
1482
     *    iteration, until no matching pattern remains.
1483
     *
1484
     * f) If the buffer string ends with "<segment>/..", where <segment>
1485
     *    is a complete path segment not equal to "..", that
1486
     *    "<segment>/.." is removed.
1487
     *
1488
     * To satisfy the "iterative" clause in (e), we need to collapse the
1489
     * string every time we find something that needs to be removed.  Thus,
1490
     * we don't need to keep two pointers into the string: we only need a
1491
     * "current position" pointer.
1492
     */
1493
16.8k
    while (1) {
1494
16.8k
        char *segp, *tmp;
1495
1496
        /* At the beginning of each iteration of this loop, "cur" points to
1497
         * the first character of the segment we want to examine.
1498
         */
1499
1500
        /* Find the end of the current segment.  */
1501
16.8k
        segp = cur;
1502
101k
        while ((segp[0] != '/') && (segp[0] != '\0'))
1503
84.2k
          ++segp;
1504
1505
        /* If this is the last segment, we're done (we need at least two
1506
         * segments to meet the criteria for the (e) and (f) cases).
1507
         */
1508
16.8k
        if (segp[0] == '\0')
1509
1.90k
          break;
1510
1511
        /* If the first segment is "..", or if the next segment _isn't_ "..",
1512
         * keep this segment and try the next one.
1513
         */
1514
14.9k
        ++segp;
1515
14.9k
        if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1516
13.9k
            || ((segp[0] != '.') || (segp[1] != '.')
1517
9.23k
                || ((segp[2] != '/') && (segp[2] != '\0')))) {
1518
9.23k
          cur = segp;
1519
9.23k
          continue;
1520
9.23k
        }
1521
1522
        /* If we get here, remove this segment and the next one and back up
1523
         * to the previous segment (if there is one), to implement the
1524
         * "iteratively" clause.  It's pretty much impossible to back up
1525
         * while maintaining two pointers into the buffer, so just compact
1526
         * the whole buffer now.
1527
         */
1528
1529
        /* If this is the end of the buffer, we're done.  */
1530
5.69k
        if (segp[2] == '\0') {
1531
59
          cur[0] = '\0';
1532
59
          break;
1533
59
        }
1534
        /* Valgrind complained, strcpy(cur, segp + 3); */
1535
        /* string will overlap, do not use strcpy */
1536
5.63k
        tmp = cur;
1537
5.63k
        segp += 3;
1538
2.32M
        while ((*tmp++ = *segp++) != 0)
1539
2.32M
          ;
1540
1541
        /* If there are no previous segments, then keep going from here.  */
1542
5.63k
        segp = cur;
1543
10.6k
        while ((segp > path) && ((--segp)[0] == '/'))
1544
5.05k
          ;
1545
5.63k
        if (segp == path)
1546
668
          continue;
1547
1548
        /* "segp" is pointing to the end of a previous segment; find it's
1549
         * start.  We need to back up to the previous segment and start
1550
         * over with that to handle things like "foo/bar/../..".  If we
1551
         * don't do this, then on the first pass we'll remove the "bar/..",
1552
         * but be pointing at the second ".." so we won't realize we can also
1553
         * remove the "foo/..".
1554
         */
1555
4.96k
        cur = segp;
1556
16.9k
        while ((cur > path) && (cur[-1] != '/'))
1557
11.9k
          --cur;
1558
4.96k
    }
1559
1.96k
    out[0] = '\0';
1560
1561
    /*
1562
     * g) If the resulting buffer string still begins with one or more
1563
     *    complete path segments of "..", then the reference is
1564
     *    considered to be in error. Implementations may handle this
1565
     *    error by retaining these components in the resolved path (i.e.,
1566
     *    treating them as part of the final URI), by removing them from
1567
     *    the resolved path (i.e., discarding relative levels above the
1568
     *    root), or by avoiding traversal of the reference.
1569
     *
1570
     * We discard them from the final path.
1571
     */
1572
1.96k
    if (path[0] == '/') {
1573
148
      cur = path;
1574
148
      while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1575
0
             && ((cur[3] == '/') || (cur[3] == '\0')))
1576
0
  cur += 3;
1577
1578
148
      if (cur != path) {
1579
0
  out = path;
1580
0
  while (cur[0] != '\0')
1581
0
          (out++)[0] = (cur++)[0];
1582
0
  out[0] = 0;
1583
0
      }
1584
148
    }
1585
1586
1.96k
    return(0);
1587
1.97k
}
1588
1589
438k
static int is_hex(char c) {
1590
438k
    if (((c >= '0') && (c <= '9')) ||
1591
340k
        ((c >= 'a') && (c <= 'f')) ||
1592
202k
        ((c >= 'A') && (c <= 'F')))
1593
385k
  return(1);
1594
53.0k
    return(0);
1595
438k
}
1596
1597
/**
1598
 * xmlURIUnescapeString:
1599
 * @str:  the string to unescape
1600
 * @len:   the length in bytes to unescape (or <= 0 to indicate full string)
1601
 * @target:  optional destination buffer
1602
 *
1603
 * Unescaping routine, but does not check that the string is an URI. The
1604
 * output is a direct unsigned char translation of %XX values (no encoding)
1605
 * Note that the length of the result can only be smaller or same size as
1606
 * the input string.
1607
 *
1608
 * Returns a copy of the string, but unescaped, will return NULL only in case
1609
 * of error
1610
 */
1611
char *
1612
895k
xmlURIUnescapeString(const char *str, int len, char *target) {
1613
895k
    char *ret, *out;
1614
895k
    const char *in;
1615
1616
895k
    if (str == NULL)
1617
0
  return(NULL);
1618
895k
    if (len <= 0) len = strlen(str);
1619
895k
    if (len < 0) return(NULL);
1620
1621
895k
    if (target == NULL) {
1622
895k
  ret = (char *) xmlMallocAtomic(len + 1);
1623
895k
  if (ret == NULL) {
1624
0
            xmlURIErrMemory("unescaping URI value\n");
1625
0
      return(NULL);
1626
0
  }
1627
895k
    } else
1628
0
  ret = target;
1629
895k
    in = str;
1630
895k
    out = ret;
1631
278M
    while(len > 0) {
1632
277M
  if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1633
184k
      in++;
1634
184k
      if ((*in >= '0') && (*in <= '9'))
1635
50.3k
          *out = (*in - '0');
1636
134k
      else if ((*in >= 'a') && (*in <= 'f'))
1637
60.9k
          *out = (*in - 'a') + 10;
1638
73.4k
      else if ((*in >= 'A') && (*in <= 'F'))
1639
73.4k
          *out = (*in - 'A') + 10;
1640
184k
      in++;
1641
184k
      if ((*in >= '0') && (*in <= '9'))
1642
45.6k
          *out = *out * 16 + (*in - '0');
1643
139k
      else if ((*in >= 'a') && (*in <= 'f'))
1644
67.3k
          *out = *out * 16 + (*in - 'a') + 10;
1645
71.7k
      else if ((*in >= 'A') && (*in <= 'F'))
1646
71.7k
          *out = *out * 16 + (*in - 'A') + 10;
1647
184k
      in++;
1648
184k
      len -= 3;
1649
184k
      out++;
1650
277M
  } else {
1651
277M
      *out++ = *in++;
1652
277M
      len--;
1653
277M
  }
1654
277M
    }
1655
895k
    *out = 0;
1656
895k
    return(ret);
1657
895k
}
1658
1659
/**
1660
 * xmlURIEscapeStr:
1661
 * @str:  string to escape
1662
 * @list: exception list string of chars not to escape
1663
 *
1664
 * This routine escapes a string to hex, ignoring reserved characters (a-z)
1665
 * and the characters in the exception list.
1666
 *
1667
 * Returns a new escaped string or NULL in case of error.
1668
 */
1669
xmlChar *
1670
0
xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1671
0
    xmlChar *ret, ch;
1672
0
    xmlChar *temp;
1673
0
    const xmlChar *in;
1674
0
    int len, out;
1675
1676
0
    if (str == NULL)
1677
0
  return(NULL);
1678
0
    if (str[0] == 0)
1679
0
  return(xmlStrdup(str));
1680
0
    len = xmlStrlen(str);
1681
0
    if (!(len > 0)) return(NULL);
1682
1683
0
    len += 20;
1684
0
    ret = (xmlChar *) xmlMallocAtomic(len);
1685
0
    if (ret == NULL) {
1686
0
        xmlURIErrMemory("escaping URI value\n");
1687
0
  return(NULL);
1688
0
    }
1689
0
    in = (const xmlChar *) str;
1690
0
    out = 0;
1691
0
    while(*in != 0) {
1692
0
  if (len - out <= 3) {
1693
0
            temp = xmlSaveUriRealloc(ret, &len);
1694
0
      if (temp == NULL) {
1695
0
                xmlURIErrMemory("escaping URI value\n");
1696
0
    xmlFree(ret);
1697
0
    return(NULL);
1698
0
      }
1699
0
      ret = temp;
1700
0
  }
1701
1702
0
  ch = *in;
1703
1704
0
  if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1705
0
      unsigned char val;
1706
0
      ret[out++] = '%';
1707
0
      val = ch >> 4;
1708
0
      if (val <= 9)
1709
0
    ret[out++] = '0' + val;
1710
0
      else
1711
0
    ret[out++] = 'A' + val - 0xA;
1712
0
      val = ch & 0xF;
1713
0
      if (val <= 9)
1714
0
    ret[out++] = '0' + val;
1715
0
      else
1716
0
    ret[out++] = 'A' + val - 0xA;
1717
0
      in++;
1718
0
  } else {
1719
0
      ret[out++] = *in++;
1720
0
  }
1721
1722
0
    }
1723
0
    ret[out] = 0;
1724
0
    return(ret);
1725
0
}
1726
1727
/**
1728
 * xmlURIEscape:
1729
 * @str:  the string of the URI to escape
1730
 *
1731
 * Escaping routine, does not do validity checks !
1732
 * It will try to escape the chars needing this, but this is heuristic
1733
 * based it's impossible to be sure.
1734
 *
1735
 * Returns an copy of the string, but escaped
1736
 *
1737
 * 25 May 2001
1738
 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1739
 * according to RFC2396.
1740
 *   - Carl Douglas
1741
 */
1742
xmlChar *
1743
xmlURIEscape(const xmlChar * str)
1744
0
{
1745
0
    xmlChar *ret, *segment = NULL;
1746
0
    xmlURIPtr uri;
1747
0
    int ret2;
1748
1749
0
#define NULLCHK(p) if(!p) { \
1750
0
         xmlURIErrMemory("escaping URI value\n"); \
1751
0
         xmlFreeURI(uri); \
1752
0
         return NULL; } \
1753
0
1754
0
    if (str == NULL)
1755
0
        return (NULL);
1756
1757
0
    uri = xmlCreateURI();
1758
0
    if (uri != NULL) {
1759
  /*
1760
   * Allow escaping errors in the unescaped form
1761
   */
1762
0
        uri->cleanup = 1;
1763
0
        ret2 = xmlParseURIReference(uri, (const char *)str);
1764
0
        if (ret2) {
1765
0
            xmlFreeURI(uri);
1766
0
            return (NULL);
1767
0
        }
1768
0
    }
1769
1770
0
    if (!uri)
1771
0
        return NULL;
1772
1773
0
    ret = NULL;
1774
1775
0
    if (uri->scheme) {
1776
0
        segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1777
0
        NULLCHK(segment)
1778
0
        ret = xmlStrcat(ret, segment);
1779
0
        ret = xmlStrcat(ret, BAD_CAST ":");
1780
0
        xmlFree(segment);
1781
0
    }
1782
1783
0
    if (uri->authority) {
1784
0
        segment =
1785
0
            xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1786
0
        NULLCHK(segment)
1787
0
        ret = xmlStrcat(ret, BAD_CAST "//");
1788
0
        ret = xmlStrcat(ret, segment);
1789
0
        xmlFree(segment);
1790
0
    }
1791
1792
0
    if (uri->user) {
1793
0
        segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1794
0
        NULLCHK(segment)
1795
0
    ret = xmlStrcat(ret,BAD_CAST "//");
1796
0
        ret = xmlStrcat(ret, segment);
1797
0
        ret = xmlStrcat(ret, BAD_CAST "@");
1798
0
        xmlFree(segment);
1799
0
    }
1800
1801
0
    if (uri->server) {
1802
0
        segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1803
0
        NULLCHK(segment)
1804
0
    if (uri->user == NULL)
1805
0
    ret = xmlStrcat(ret, BAD_CAST "//");
1806
0
        ret = xmlStrcat(ret, segment);
1807
0
        xmlFree(segment);
1808
0
    }
1809
1810
0
    if (uri->port) {
1811
0
        xmlChar port[10];
1812
1813
0
        snprintf((char *) port, 10, "%d", uri->port);
1814
0
        ret = xmlStrcat(ret, BAD_CAST ":");
1815
0
        ret = xmlStrcat(ret, port);
1816
0
    }
1817
1818
0
    if (uri->path) {
1819
0
        segment =
1820
0
            xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1821
0
        NULLCHK(segment)
1822
0
        ret = xmlStrcat(ret, segment);
1823
0
        xmlFree(segment);
1824
0
    }
1825
1826
0
    if (uri->query_raw) {
1827
0
        ret = xmlStrcat(ret, BAD_CAST "?");
1828
0
        ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1829
0
    }
1830
0
    else if (uri->query) {
1831
0
        segment =
1832
0
            xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1833
0
        NULLCHK(segment)
1834
0
        ret = xmlStrcat(ret, BAD_CAST "?");
1835
0
        ret = xmlStrcat(ret, segment);
1836
0
        xmlFree(segment);
1837
0
    }
1838
1839
0
    if (uri->opaque) {
1840
0
        segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1841
0
        NULLCHK(segment)
1842
0
        ret = xmlStrcat(ret, segment);
1843
0
        xmlFree(segment);
1844
0
    }
1845
1846
0
    if (uri->fragment) {
1847
0
        segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1848
0
        NULLCHK(segment)
1849
0
        ret = xmlStrcat(ret, BAD_CAST "#");
1850
0
        ret = xmlStrcat(ret, segment);
1851
0
        xmlFree(segment);
1852
0
    }
1853
1854
0
    xmlFreeURI(uri);
1855
0
#undef NULLCHK
1856
1857
0
    return (ret);
1858
0
}
1859
1860
/************************************************************************
1861
 *                  *
1862
 *      Public functions        *
1863
 *                  *
1864
 ************************************************************************/
1865
1866
/**
1867
 * xmlBuildURI:
1868
 * @URI:  the URI instance found in the document
1869
 * @base:  the base value
1870
 *
1871
 * Computes he final URI of the reference done by checking that
1872
 * the given URI is valid, and building the final URI using the
1873
 * base URI. This is processed according to section 5.2 of the
1874
 * RFC 2396
1875
 *
1876
 * 5.2. Resolving Relative References to Absolute Form
1877
 *
1878
 * Returns a new URI string (to be freed by the caller) or NULL in case
1879
 *         of error.
1880
 */
1881
xmlChar *
1882
8.11k
xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1883
8.11k
    xmlChar *val = NULL;
1884
8.11k
    int ret, len, indx, cur, out;
1885
8.11k
    xmlURIPtr ref = NULL;
1886
8.11k
    xmlURIPtr bas = NULL;
1887
8.11k
    xmlURIPtr res = NULL;
1888
1889
    /*
1890
     * 1) The URI reference is parsed into the potential four components and
1891
     *    fragment identifier, as described in Section 4.3.
1892
     *
1893
     *    NOTE that a completely empty URI is treated by modern browsers
1894
     *    as a reference to "." rather than as a synonym for the current
1895
     *    URI.  Should we do that here?
1896
     */
1897
8.11k
    if (URI == NULL)
1898
0
  ret = -1;
1899
8.11k
    else {
1900
8.11k
  if (*URI) {
1901
7.37k
      ref = xmlCreateURI();
1902
7.37k
      if (ref == NULL)
1903
0
    goto done;
1904
7.37k
      ret = xmlParseURIReference(ref, (const char *) URI);
1905
7.37k
  }
1906
741
  else
1907
741
      ret = 0;
1908
8.11k
    }
1909
8.11k
    if (ret != 0)
1910
1.49k
  goto done;
1911
6.62k
    if ((ref != NULL) && (ref->scheme != NULL)) {
1912
  /*
1913
   * The URI is absolute don't modify.
1914
   */
1915
22
  val = xmlStrdup(URI);
1916
22
  goto done;
1917
22
    }
1918
6.60k
    if (base == NULL)
1919
1.89k
  ret = -1;
1920
4.70k
    else {
1921
4.70k
  bas = xmlCreateURI();
1922
4.70k
  if (bas == NULL)
1923
0
      goto done;
1924
4.70k
  ret = xmlParseURIReference(bas, (const char *) base);
1925
4.70k
    }
1926
6.60k
    if (ret != 0) {
1927
1.89k
  if (ref)
1928
1.67k
      val = xmlSaveUri(ref);
1929
1.89k
  goto done;
1930
1.89k
    }
1931
4.70k
    if (ref == NULL) {
1932
  /*
1933
   * the base fragment must be ignored
1934
   */
1935
521
  if (bas->fragment != NULL) {
1936
0
      xmlFree(bas->fragment);
1937
0
      bas->fragment = NULL;
1938
0
  }
1939
521
  val = xmlSaveUri(bas);
1940
521
  goto done;
1941
521
    }
1942
1943
    /*
1944
     * 2) If the path component is empty and the scheme, authority, and
1945
     *    query components are undefined, then it is a reference to the
1946
     *    current document and we are done.  Otherwise, the reference URI's
1947
     *    query and fragment components are defined as found (or not found)
1948
     *    within the URI reference and not inherited from the base URI.
1949
     *
1950
     *    NOTE that in modern browsers, the parsing differs from the above
1951
     *    in the following aspect:  the query component is allowed to be
1952
     *    defined while still treating this as a reference to the current
1953
     *    document.
1954
     */
1955
4.18k
    res = xmlCreateURI();
1956
4.18k
    if (res == NULL)
1957
0
  goto done;
1958
4.18k
    if ((ref->scheme == NULL) && (ref->path == NULL) &&
1959
1.51k
  ((ref->authority == NULL) && (ref->server == NULL))) {
1960
292
  if (bas->scheme != NULL)
1961
0
      res->scheme = xmlMemStrdup(bas->scheme);
1962
292
  if (bas->authority != NULL)
1963
0
      res->authority = xmlMemStrdup(bas->authority);
1964
292
  else if ((bas->server != NULL) || (bas->port == -1)) {
1965
0
      if (bas->server != NULL)
1966
0
    res->server = xmlMemStrdup(bas->server);
1967
0
      if (bas->user != NULL)
1968
0
    res->user = xmlMemStrdup(bas->user);
1969
0
      res->port = bas->port;
1970
0
  }
1971
292
  if (bas->path != NULL)
1972
44
      res->path = xmlMemStrdup(bas->path);
1973
292
  if (ref->query_raw != NULL)
1974
72
      res->query_raw = xmlMemStrdup (ref->query_raw);
1975
220
  else if (ref->query != NULL)
1976
0
      res->query = xmlMemStrdup(ref->query);
1977
220
  else if (bas->query_raw != NULL)
1978
0
      res->query_raw = xmlMemStrdup(bas->query_raw);
1979
220
  else if (bas->query != NULL)
1980
0
      res->query = xmlMemStrdup(bas->query);
1981
292
  if (ref->fragment != NULL)
1982
225
      res->fragment = xmlMemStrdup(ref->fragment);
1983
292
  goto step_7;
1984
292
    }
1985
1986
    /*
1987
     * 3) If the scheme component is defined, indicating that the reference
1988
     *    starts with a scheme name, then the reference is interpreted as an
1989
     *    absolute URI and we are done.  Otherwise, the reference URI's
1990
     *    scheme is inherited from the base URI's scheme component.
1991
     */
1992
3.88k
    if (ref->scheme != NULL) {
1993
0
  val = xmlSaveUri(ref);
1994
0
  goto done;
1995
0
    }
1996
3.88k
    if (bas->scheme != NULL)
1997
0
  res->scheme = xmlMemStrdup(bas->scheme);
1998
1999
3.88k
    if (ref->query_raw != NULL)
2000
532
  res->query_raw = xmlMemStrdup(ref->query_raw);
2001
3.35k
    else if (ref->query != NULL)
2002
0
  res->query = xmlMemStrdup(ref->query);
2003
3.88k
    if (ref->fragment != NULL)
2004
1.18k
  res->fragment = xmlMemStrdup(ref->fragment);
2005
2006
    /*
2007
     * 4) If the authority component is defined, then the reference is a
2008
     *    network-path and we skip to step 7.  Otherwise, the reference
2009
     *    URI's authority is inherited from the base URI's authority
2010
     *    component, which will also be undefined if the URI scheme does not
2011
     *    use an authority component.
2012
     */
2013
3.88k
    if ((ref->authority != NULL) || (ref->server != NULL)) {
2014
1.54k
  if (ref->authority != NULL)
2015
0
      res->authority = xmlMemStrdup(ref->authority);
2016
1.54k
  else {
2017
1.54k
      res->server = xmlMemStrdup(ref->server);
2018
1.54k
      if (ref->user != NULL)
2019
889
    res->user = xmlMemStrdup(ref->user);
2020
1.54k
            res->port = ref->port;
2021
1.54k
  }
2022
1.54k
  if (ref->path != NULL)
2023
322
      res->path = xmlMemStrdup(ref->path);
2024
1.54k
  goto step_7;
2025
1.54k
    }
2026
2.34k
    if (bas->authority != NULL)
2027
0
  res->authority = xmlMemStrdup(bas->authority);
2028
2.34k
    else if ((bas->server != NULL) || (bas->port == -1)) {
2029
0
  if (bas->server != NULL)
2030
0
      res->server = xmlMemStrdup(bas->server);
2031
0
  if (bas->user != NULL)
2032
0
      res->user = xmlMemStrdup(bas->user);
2033
0
  res->port = bas->port;
2034
0
    }
2035
2036
    /*
2037
     * 5) If the path component begins with a slash character ("/"), then
2038
     *    the reference is an absolute-path and we skip to step 7.
2039
     */
2040
2.34k
    if ((ref->path != NULL) && (ref->path[0] == '/')) {
2041
365
  res->path = xmlMemStrdup(ref->path);
2042
365
  goto step_7;
2043
365
    }
2044
2045
2046
    /*
2047
     * 6) If this step is reached, then we are resolving a relative-path
2048
     *    reference.  The relative path needs to be merged with the base
2049
     *    URI's path.  Although there are many ways to do this, we will
2050
     *    describe a simple method using a separate string buffer.
2051
     *
2052
     * Allocate a buffer large enough for the result string.
2053
     */
2054
1.98k
    len = 2; /* extra / and 0 */
2055
1.98k
    if (ref->path != NULL)
2056
1.98k
  len += strlen(ref->path);
2057
1.98k
    if (bas->path != NULL)
2058
148
  len += strlen(bas->path);
2059
1.98k
    res->path = (char *) xmlMallocAtomic(len);
2060
1.98k
    if (res->path == NULL) {
2061
0
        xmlURIErrMemory("resolving URI against base\n");
2062
0
  goto done;
2063
0
    }
2064
1.98k
    res->path[0] = 0;
2065
2066
    /*
2067
     * a) All but the last segment of the base URI's path component is
2068
     *    copied to the buffer.  In other words, any characters after the
2069
     *    last (right-most) slash character, if any, are excluded.
2070
     */
2071
1.98k
    cur = 0;
2072
1.98k
    out = 0;
2073
1.98k
    if (bas->path != NULL) {
2074
592
  while (bas->path[cur] != 0) {
2075
6.06k
      while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2076
5.47k
    cur++;
2077
592
      if (bas->path[cur] == 0)
2078
148
    break;
2079
2080
444
      cur++;
2081
2.66k
      while (out < cur) {
2082
2.22k
    res->path[out] = bas->path[out];
2083
2.22k
    out++;
2084
2.22k
      }
2085
444
  }
2086
148
    }
2087
1.98k
    res->path[out] = 0;
2088
2089
    /*
2090
     * b) The reference's path component is appended to the buffer
2091
     *    string.
2092
     */
2093
1.98k
    if (ref->path != NULL && ref->path[0] != 0) {
2094
1.97k
  indx = 0;
2095
  /*
2096
   * Ensure the path includes a '/'
2097
   */
2098
1.97k
  if ((out == 0) && (bas->server != NULL))
2099
0
      res->path[out++] = '/';
2100
107k
  while (ref->path[indx] != 0) {
2101
105k
      res->path[out++] = ref->path[indx++];
2102
105k
  }
2103
1.97k
    }
2104
1.98k
    res->path[out] = 0;
2105
2106
    /*
2107
     * Steps c) to h) are really path normalization steps
2108
     */
2109
1.98k
    xmlNormalizeURIPath(res->path);
2110
2111
4.18k
step_7:
2112
2113
    /*
2114
     * 7) The resulting URI components, including any inherited from the
2115
     *    base URI, are recombined to give the absolute form of the URI
2116
     *    reference.
2117
     */
2118
4.18k
    val = xmlSaveUri(res);
2119
2120
8.11k
done:
2121
8.11k
    if (ref != NULL)
2122
7.37k
  xmlFreeURI(ref);
2123
8.11k
    if (bas != NULL)
2124
4.70k
  xmlFreeURI(bas);
2125
8.11k
    if (res != NULL)
2126
4.18k
  xmlFreeURI(res);
2127
8.11k
    return(val);
2128
4.18k
}
2129
2130
/**
2131
 * xmlBuildRelativeURI:
2132
 * @URI:  the URI reference under consideration
2133
 * @base:  the base value
2134
 *
2135
 * Expresses the URI of the reference in terms relative to the
2136
 * base.  Some examples of this operation include:
2137
 *     base = "http://site1.com/docs/book1.html"
2138
 *        URI input                        URI returned
2139
 *     docs/pic1.gif                    pic1.gif
2140
 *     docs/img/pic1.gif                img/pic1.gif
2141
 *     img/pic1.gif                     ../img/pic1.gif
2142
 *     http://site1.com/docs/pic1.gif   pic1.gif
2143
 *     http://site2.com/docs/pic1.gif   http://site2.com/docs/pic1.gif
2144
 *
2145
 *     base = "docs/book1.html"
2146
 *        URI input                        URI returned
2147
 *     docs/pic1.gif                    pic1.gif
2148
 *     docs/img/pic1.gif                img/pic1.gif
2149
 *     img/pic1.gif                     ../img/pic1.gif
2150
 *     http://site1.com/docs/pic1.gif   http://site1.com/docs/pic1.gif
2151
 *
2152
 *
2153
 * Note: if the URI reference is really wierd or complicated, it may be
2154
 *       worthwhile to first convert it into a "nice" one by calling
2155
 *       xmlBuildURI (using 'base') before calling this routine,
2156
 *       since this routine (for reasonable efficiency) assumes URI has
2157
 *       already been through some validation.
2158
 *
2159
 * Returns a new URI string (to be freed by the caller) or NULL in case
2160
 * error.
2161
 */
2162
xmlChar *
2163
xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2164
0
{
2165
0
    xmlChar *val = NULL;
2166
0
    int ret;
2167
0
    int ix;
2168
0
    int nbslash = 0;
2169
0
    int len;
2170
0
    xmlURIPtr ref = NULL;
2171
0
    xmlURIPtr bas = NULL;
2172
0
    xmlChar *bptr, *uptr, *vptr;
2173
0
    int remove_path = 0;
2174
2175
0
    if ((URI == NULL) || (*URI == 0))
2176
0
  return NULL;
2177
2178
    /*
2179
     * First parse URI into a standard form
2180
     */
2181
0
    ref = xmlCreateURI ();
2182
0
    if (ref == NULL)
2183
0
  return NULL;
2184
    /* If URI not already in "relative" form */
2185
0
    if (URI[0] != '.') {
2186
0
  ret = xmlParseURIReference (ref, (const char *) URI);
2187
0
  if (ret != 0)
2188
0
      goto done;   /* Error in URI, return NULL */
2189
0
    } else
2190
0
  ref->path = (char *)xmlStrdup(URI);
2191
2192
    /*
2193
     * Next parse base into the same standard form
2194
     */
2195
0
    if ((base == NULL) || (*base == 0)) {
2196
0
  val = xmlStrdup (URI);
2197
0
  goto done;
2198
0
    }
2199
0
    bas = xmlCreateURI ();
2200
0
    if (bas == NULL)
2201
0
  goto done;
2202
0
    if (base[0] != '.') {
2203
0
  ret = xmlParseURIReference (bas, (const char *) base);
2204
0
  if (ret != 0)
2205
0
      goto done;   /* Error in base, return NULL */
2206
0
    } else
2207
0
  bas->path = (char *)xmlStrdup(base);
2208
2209
    /*
2210
     * If the scheme / server on the URI differs from the base,
2211
     * just return the URI
2212
     */
2213
0
    if ((ref->scheme != NULL) &&
2214
0
  ((bas->scheme == NULL) ||
2215
0
   (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2216
0
   (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
2217
0
  val = xmlStrdup (URI);
2218
0
  goto done;
2219
0
    }
2220
0
    if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2221
0
  val = xmlStrdup(BAD_CAST "");
2222
0
  goto done;
2223
0
    }
2224
0
    if (bas->path == NULL) {
2225
0
  val = xmlStrdup((xmlChar *)ref->path);
2226
0
  goto done;
2227
0
    }
2228
0
    if (ref->path == NULL) {
2229
0
        ref->path = (char *) "/";
2230
0
  remove_path = 1;
2231
0
    }
2232
2233
    /*
2234
     * At this point (at last!) we can compare the two paths
2235
     *
2236
     * First we take care of the special case where either of the
2237
     * two path components may be missing (bug 316224)
2238
     */
2239
0
    if (bas->path == NULL) {
2240
0
  if (ref->path != NULL) {
2241
0
      uptr = (xmlChar *) ref->path;
2242
0
      if (*uptr == '/')
2243
0
    uptr++;
2244
      /* exception characters from xmlSaveUri */
2245
0
      val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2246
0
  }
2247
0
  goto done;
2248
0
    }
2249
0
    bptr = (xmlChar *)bas->path;
2250
0
    if (ref->path == NULL) {
2251
0
  for (ix = 0; bptr[ix] != 0; ix++) {
2252
0
      if (bptr[ix] == '/')
2253
0
    nbslash++;
2254
0
  }
2255
0
  uptr = NULL;
2256
0
  len = 1;  /* this is for a string terminator only */
2257
0
    } else {
2258
0
        xmlChar *rptr = (xmlChar *) ref->path;
2259
0
        int pos = 0;
2260
2261
        /*
2262
         * Next we compare the two strings and find where they first differ
2263
         */
2264
0
  if ((*rptr == '.') && (rptr[1] == '/'))
2265
0
            rptr += 2;
2266
0
  if ((*bptr == '.') && (bptr[1] == '/'))
2267
0
            bptr += 2;
2268
0
  else if ((*bptr == '/') && (*rptr != '/'))
2269
0
      bptr++;
2270
0
  while ((bptr[pos] == rptr[pos]) && (bptr[pos] != 0))
2271
0
      pos++;
2272
2273
0
  if (bptr[pos] == rptr[pos]) {
2274
0
      val = xmlStrdup(BAD_CAST "");
2275
0
      goto done;    /* (I can't imagine why anyone would do this) */
2276
0
  }
2277
2278
  /*
2279
   * In URI, "back up" to the last '/' encountered.  This will be the
2280
   * beginning of the "unique" suffix of URI
2281
   */
2282
0
  ix = pos;
2283
0
  if ((rptr[ix] == '/') && (ix > 0))
2284
0
      ix--;
2285
0
  else if ((rptr[ix] == 0) && (ix > 1) && (rptr[ix - 1] == '/'))
2286
0
      ix -= 2;
2287
0
  for (; ix > 0; ix--) {
2288
0
      if (rptr[ix] == '/')
2289
0
    break;
2290
0
  }
2291
0
  if (ix == 0) {
2292
0
      uptr = (xmlChar *)rptr;
2293
0
  } else {
2294
0
      ix++;
2295
0
      uptr = (xmlChar *)&rptr[ix];
2296
0
  }
2297
2298
  /*
2299
   * In base, count the number of '/' from the differing point
2300
   */
2301
0
  if (bptr[pos] != rptr[pos]) {/* check for trivial URI == base */
2302
0
      for (; bptr[ix] != 0; ix++) {
2303
0
    if (bptr[ix] == '/')
2304
0
        nbslash++;
2305
0
      }
2306
0
  }
2307
0
  len = xmlStrlen (uptr) + 1;
2308
0
    }
2309
2310
0
    if (nbslash == 0) {
2311
0
  if (uptr != NULL)
2312
      /* exception characters from xmlSaveUri */
2313
0
      val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2314
0
  goto done;
2315
0
    }
2316
2317
    /*
2318
     * Allocate just enough space for the returned string -
2319
     * length of the remainder of the URI, plus enough space
2320
     * for the "../" groups, plus one for the terminator
2321
     */
2322
0
    val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2323
0
    if (val == NULL) {
2324
0
        xmlURIErrMemory("building relative URI\n");
2325
0
  goto done;
2326
0
    }
2327
0
    vptr = val;
2328
    /*
2329
     * Put in as many "../" as needed
2330
     */
2331
0
    for (; nbslash>0; nbslash--) {
2332
0
  *vptr++ = '.';
2333
0
  *vptr++ = '.';
2334
0
  *vptr++ = '/';
2335
0
    }
2336
    /*
2337
     * Finish up with the end of the URI
2338
     */
2339
0
    if (uptr != NULL) {
2340
0
        if ((vptr > val) && (len > 0) &&
2341
0
      (uptr[0] == '/') && (vptr[-1] == '/')) {
2342
0
      memcpy (vptr, uptr + 1, len - 1);
2343
0
      vptr[len - 2] = 0;
2344
0
  } else {
2345
0
      memcpy (vptr, uptr, len);
2346
0
      vptr[len - 1] = 0;
2347
0
  }
2348
0
    } else {
2349
0
  vptr[len - 1] = 0;
2350
0
    }
2351
2352
    /* escape the freshly-built path */
2353
0
    vptr = val;
2354
  /* exception characters from xmlSaveUri */
2355
0
    val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2356
0
    xmlFree(vptr);
2357
2358
0
done:
2359
    /*
2360
     * Free the working variables
2361
     */
2362
0
    if (remove_path != 0)
2363
0
        ref->path = NULL;
2364
0
    if (ref != NULL)
2365
0
  xmlFreeURI (ref);
2366
0
    if (bas != NULL)
2367
0
  xmlFreeURI (bas);
2368
2369
0
    return val;
2370
0
}
2371
2372
/**
2373
 * xmlCanonicPath:
2374
 * @path:  the resource locator in a filesystem notation
2375
 *
2376
 * Constructs a canonic path from the specified path.
2377
 *
2378
 * Returns a new canonic path, or a duplicate of the path parameter if the
2379
 * construction fails. The caller is responsible for freeing the memory occupied
2380
 * by the returned string. If there is insufficient memory available, or the
2381
 * argument is NULL, the function returns NULL.
2382
 */
2383
#define IS_WINDOWS_PATH(p)          \
2384
  ((p != NULL) &&           \
2385
   (((p[0] >= 'a') && (p[0] <= 'z')) ||     \
2386
    ((p[0] >= 'A') && (p[0] <= 'Z'))) &&      \
2387
   (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2388
xmlChar *
2389
xmlCanonicPath(const xmlChar *path)
2390
92.4k
{
2391
/*
2392
 * For Windows implementations, additional work needs to be done to
2393
 * replace backslashes in pathnames with "forward slashes"
2394
 */
2395
#if defined(_WIN32) && !defined(__CYGWIN__)
2396
    int len = 0;
2397
    char *p = NULL;
2398
#endif
2399
92.4k
    xmlURIPtr uri;
2400
92.4k
    xmlChar *ret;
2401
92.4k
    const xmlChar *absuri;
2402
2403
92.4k
    if (path == NULL)
2404
0
  return(NULL);
2405
2406
#if defined(_WIN32)
2407
    /*
2408
     * We must not change the backslashes to slashes if the the path
2409
     * starts with \\?\
2410
     * Those paths can be up to 32k characters long.
2411
     * Was added specifically for OpenOffice, those paths can't be converted
2412
     * to URIs anyway.
2413
     */
2414
    if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
2415
        (path[3] == '\\') )
2416
  return xmlStrdup((const xmlChar *) path);
2417
#endif
2418
2419
  /* sanitize filename starting with // so it can be used as URI */
2420
92.4k
    if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2421
0
        path++;
2422
2423
92.4k
    if ((uri = xmlParseURI((const char *) path)) != NULL) {
2424
92.4k
  xmlFreeURI(uri);
2425
92.4k
  return xmlStrdup(path);
2426
92.4k
    }
2427
2428
    /* Check if this is an "absolute uri" */
2429
0
    absuri = xmlStrstr(path, BAD_CAST "://");
2430
0
    if (absuri != NULL) {
2431
0
        int l, j;
2432
0
  unsigned char c;
2433
0
  xmlChar *escURI;
2434
2435
        /*
2436
   * this looks like an URI where some parts have not been
2437
   * escaped leading to a parsing problem.  Check that the first
2438
   * part matches a protocol.
2439
   */
2440
0
  l = absuri - path;
2441
  /* Bypass if first part (part before the '://') is > 20 chars */
2442
0
  if ((l <= 0) || (l > 20))
2443
0
      goto path_processing;
2444
  /* Bypass if any non-alpha characters are present in first part */
2445
0
  for (j = 0;j < l;j++) {
2446
0
      c = path[j];
2447
0
      if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2448
0
          goto path_processing;
2449
0
  }
2450
2451
  /* Escape all except the characters specified in the supplied path */
2452
0
        escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2453
0
  if (escURI != NULL) {
2454
      /* Try parsing the escaped path */
2455
0
      uri = xmlParseURI((const char *) escURI);
2456
      /* If successful, return the escaped string */
2457
0
      if (uri != NULL) {
2458
0
          xmlFreeURI(uri);
2459
0
    return escURI;
2460
0
      }
2461
0
            xmlFree(escURI);
2462
0
  }
2463
0
    }
2464
2465
0
path_processing:
2466
/* For Windows implementations, replace backslashes with 'forward slashes' */
2467
#if defined(_WIN32) && !defined(__CYGWIN__)
2468
    /*
2469
     * Create a URI structure
2470
     */
2471
    uri = xmlCreateURI();
2472
    if (uri == NULL) {    /* Guard against 'out of memory' */
2473
        return(NULL);
2474
    }
2475
2476
    len = xmlStrlen(path);
2477
    if ((len > 2) && IS_WINDOWS_PATH(path)) {
2478
        /* make the scheme 'file' */
2479
  uri->scheme = (char *) xmlStrdup(BAD_CAST "file");
2480
  /* allocate space for leading '/' + path + string terminator */
2481
  uri->path = xmlMallocAtomic(len + 2);
2482
  if (uri->path == NULL) {
2483
      xmlFreeURI(uri);  /* Guard agains 'out of memory' */
2484
      return(NULL);
2485
  }
2486
  /* Put in leading '/' plus path */
2487
  uri->path[0] = '/';
2488
  p = uri->path + 1;
2489
  strncpy(p, (char *) path, len + 1);
2490
    } else {
2491
  uri->path = (char *) xmlStrdup(path);
2492
  if (uri->path == NULL) {
2493
      xmlFreeURI(uri);
2494
      return(NULL);
2495
  }
2496
  p = uri->path;
2497
    }
2498
    /* Now change all occurences of '\' to '/' */
2499
    while (*p != '\0') {
2500
  if (*p == '\\')
2501
      *p = '/';
2502
  p++;
2503
    }
2504
2505
    if (uri->scheme == NULL) {
2506
  ret = xmlStrdup((const xmlChar *) uri->path);
2507
    } else {
2508
  ret = xmlSaveUri(uri);
2509
    }
2510
2511
    xmlFreeURI(uri);
2512
#else
2513
0
    ret = xmlStrdup((const xmlChar *) path);
2514
0
#endif
2515
0
    return(ret);
2516
0
}
2517
2518
/**
2519
 * xmlPathToURI:
2520
 * @path:  the resource locator in a filesystem notation
2521
 *
2522
 * Constructs an URI expressing the existing path
2523
 *
2524
 * Returns a new URI, or a duplicate of the path parameter if the
2525
 * construction fails. The caller is responsible for freeing the memory
2526
 * occupied by the returned string. If there is insufficient memory available,
2527
 * or the argument is NULL, the function returns NULL.
2528
 */
2529
xmlChar *
2530
xmlPathToURI(const xmlChar *path)
2531
90.2k
{
2532
90.2k
    xmlURIPtr uri;
2533
90.2k
    xmlURI temp;
2534
90.2k
    xmlChar *ret, *cal;
2535
2536
90.2k
    if (path == NULL)
2537
0
        return(NULL);
2538
2539
90.2k
    if ((uri = xmlParseURI((const char *) path)) != NULL) {
2540
90.2k
  xmlFreeURI(uri);
2541
90.2k
  return xmlStrdup(path);
2542
90.2k
    }
2543
0
    cal = xmlCanonicPath(path);
2544
0
    if (cal == NULL)
2545
0
        return(NULL);
2546
#if defined(_WIN32) && !defined(__CYGWIN__)
2547
    /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2548
       If 'cal' is a valid URI allready then we are done here, as continuing would make
2549
       it invalid. */
2550
    if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2551
  xmlFreeURI(uri);
2552
  return cal;
2553
    }
2554
    /* 'cal' can contain a relative path with backslashes. If that is processed
2555
       by xmlSaveURI, they will be escaped and the external entity loader machinery
2556
       will fail. So convert them to slashes. Misuse 'ret' for walking. */
2557
    ret = cal;
2558
    while (*ret != '\0') {
2559
  if (*ret == '\\')
2560
      *ret = '/';
2561
  ret++;
2562
    }
2563
#endif
2564
0
    memset(&temp, 0, sizeof(temp));
2565
0
    temp.path = (char *) cal;
2566
0
    ret = xmlSaveUri(&temp);
2567
0
    xmlFree(cal);
2568
0
    return(ret);
2569
0
}
2570
#define bottom_uri
2571
#include "elfgcchack.h"