Coverage Report

Created: 2026-04-29 07:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libxml2-2.9.7/uri.c
Line
Count
Source
1
/**
2
 * uri.c: set of generic URI related routines
3
 *
4
 * Reference: RFCs 3986, 2732 and 2373
5
 *
6
 * See Copyright for the status of this software.
7
 *
8
 * daniel@veillard.com
9
 */
10
11
#define IN_LIBXML
12
#include "libxml.h"
13
14
#include <string.h>
15
16
#include <libxml/xmlmemory.h>
17
#include <libxml/uri.h>
18
#include <libxml/globals.h>
19
#include <libxml/xmlerror.h>
20
21
/**
22
 * MAX_URI_LENGTH:
23
 *
24
 * The definition of the URI regexp in the above RFC has no size limit
25
 * In practice they are usually relativey short except for the
26
 * data URI scheme as defined in RFC 2397. Even for data URI the usual
27
 * maximum size before hitting random practical limits is around 64 KB
28
 * and 4KB is usually a maximum admitted limit for proper operations.
29
 * The value below is more a security limit than anything else and
30
 * really should never be hit by 'normal' operations
31
 * Set to 1 MByte in 2012, this is only enforced on output
32
 */
33
4.04k
#define MAX_URI_LENGTH 1024 * 1024
34
35
static void
36
xmlURIErrMemory(const char *extra)
37
0
{
38
0
    if (extra)
39
0
        __xmlRaiseError(NULL, NULL, NULL,
40
0
                        NULL, NULL, XML_FROM_URI,
41
0
                        XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
42
0
                        extra, NULL, NULL, 0, 0,
43
0
                        "Memory allocation failed : %s\n", extra);
44
0
    else
45
0
        __xmlRaiseError(NULL, NULL, NULL,
46
0
                        NULL, NULL, XML_FROM_URI,
47
0
                        XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
48
0
                        NULL, NULL, NULL, 0, 0,
49
0
                        "Memory allocation failed\n");
50
0
}
51
52
static void xmlCleanURI(xmlURIPtr uri);
53
54
/*
55
 * Old rule from 2396 used in legacy handling code
56
 * alpha    = lowalpha | upalpha
57
 */
58
4.56M
#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
59
60
61
/*
62
 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
63
 *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
64
 *            "u" | "v" | "w" | "x" | "y" | "z"
65
 */
66
67
4.56M
#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
68
69
/*
70
 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
71
 *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
72
 *           "U" | "V" | "W" | "X" | "Y" | "Z"
73
 */
74
1.98M
#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
75
76
#ifdef IS_DIGIT
77
#undef IS_DIGIT
78
#endif
79
/*
80
 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
81
 */
82
1.78M
#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
83
84
/*
85
 * alphanum = alpha | digit
86
 */
87
88
4.56M
#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
89
90
/*
91
 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
92
 */
93
94
1.55M
#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||     \
95
1.55M
    ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||    \
96
1.55M
    ((x) == '(') || ((x) == ')'))
97
98
/*
99
 * unwise = "{" | "}" | "|" | "\" | "^" | "`"
100
 */
101
102
#define IS_UNWISE(p)                                                    \
103
0
      (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \
104
0
       ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||        \
105
0
       ((*(p) == ']')) || ((*(p) == '`')))
106
/*
107
 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
108
 *            "[" | "]"
109
 */
110
111
436k
#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
112
436k
        ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
113
436k
        ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
114
436k
        ((x) == ']'))
115
116
/*
117
 * unreserved = alphanum | mark
118
 */
119
120
2.28M
#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
121
122
/*
123
 * Skip to next pointer char, handle escaped sequences
124
 */
125
126
17.1M
#define NEXT(p) ((*p == '%')? p += 3 : p++)
127
128
/*
129
 * Productions from the spec.
130
 *
131
 *    authority     = server | reg_name
132
 *    reg_name      = 1*( unreserved | escaped | "$" | "," |
133
 *                        ";" | ":" | "@" | "&" | "=" | "+" )
134
 *
135
 * path          = [ abs_path | opaque_part ]
136
 */
137
138
445k
#define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
139
140
/************************************************************************
141
 *                  *
142
 *                         RFC 3986 parser        *
143
 *                  *
144
 ************************************************************************/
145
146
13.4M
#define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
147
23.3M
#define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||   \
148
23.3M
                      ((*(p) >= 'A') && (*(p) <= 'Z')))
149
#define ISA_HEXDIG(p)             \
150
800k
       (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||   \
151
800k
        ((*(p) >= 'A') && (*(p) <= 'F')))
152
153
/*
154
 *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
155
 *                     / "*" / "+" / "," / ";" / "="
156
 */
157
#define ISA_SUB_DELIM(p)            \
158
21.6M
      (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||   \
159
5.44M
       ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||   \
160
5.44M
       ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||   \
161
5.44M
       ((*(p) == '=')) || ((*(p) == '\'')))
162
163
/*
164
 *    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
165
 */
166
#define ISA_GEN_DELIM(p)            \
167
      (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) ||         \
168
       ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) ||         \
169
       ((*(p) == '@')))
170
171
/*
172
 *    reserved      = gen-delims / sub-delims
173
 */
174
#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
175
176
/*
177
 *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
178
 */
179
#define ISA_UNRESERVED(p)           \
180
38.8M
      ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||   \
181
19.4M
       ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
182
183
/*
184
 *    pct-encoded   = "%" HEXDIG HEXDIG
185
 */
186
#define ISA_PCT_ENCODED(p)            \
187
25.1M
     ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
188
189
/*
190
 *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
191
 */
192
#define ISA_PCHAR(p)              \
193
19.5M
     (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||  \
194
12.6M
      ((*(p) == ':')) || ((*(p) == '@')))
195
196
/**
197
 * xmlParse3986Scheme:
198
 * @uri:  pointer to an URI structure
199
 * @str:  pointer to the string to analyze
200
 *
201
 * Parse an URI scheme
202
 *
203
 * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
204
 *
205
 * Returns 0 or the error code
206
 */
207
static int
208
917k
xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
209
917k
    const char *cur;
210
211
917k
    if (str == NULL)
212
0
  return(-1);
213
214
917k
    cur = *str;
215
917k
    if (!ISA_ALPHA(cur))
216
536k
  return(2);
217
381k
    cur++;
218
1.52M
    while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
219
1.14M
           (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
220
381k
    if (uri != NULL) {
221
381k
  if (uri->scheme != NULL) xmlFree(uri->scheme);
222
381k
  uri->scheme = STRNDUP(*str, cur - *str);
223
381k
    }
224
381k
    *str = cur;
225
381k
    return(0);
226
917k
}
227
228
/**
229
 * xmlParse3986Fragment:
230
 * @uri:  pointer to an URI structure
231
 * @str:  pointer to the string to analyze
232
 *
233
 * Parse the query part of an URI
234
 *
235
 * fragment      = *( pchar / "/" / "?" )
236
 * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
237
 *       in the fragment identifier but this is used very broadly for
238
 *       xpointer scheme selection, so we are allowing it here to not break
239
 *       for example all the DocBook processing chains.
240
 *
241
 * Returns 0 or the error code
242
 */
243
static int
244
xmlParse3986Fragment(xmlURIPtr uri, const char **str)
245
50.1k
{
246
50.1k
    const char *cur;
247
248
50.1k
    if (str == NULL)
249
0
        return (-1);
250
251
50.1k
    cur = *str;
252
253
2.77M
    while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
254
63.1k
           (*cur == '[') || (*cur == ']') ||
255
50.1k
           ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
256
2.72M
        NEXT(cur);
257
50.1k
    if (uri != NULL) {
258
50.1k
        if (uri->fragment != NULL)
259
0
            xmlFree(uri->fragment);
260
50.1k
  if (uri->cleanup & 2)
261
0
      uri->fragment = STRNDUP(*str, cur - *str);
262
50.1k
  else
263
50.1k
      uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
264
50.1k
    }
265
50.1k
    *str = cur;
266
50.1k
    return (0);
267
50.1k
}
268
269
/**
270
 * xmlParse3986Query:
271
 * @uri:  pointer to an URI structure
272
 * @str:  pointer to the string to analyze
273
 *
274
 * Parse the query part of an URI
275
 *
276
 * query = *uric
277
 *
278
 * Returns 0 or the error code
279
 */
280
static int
281
xmlParse3986Query(xmlURIPtr uri, const char **str)
282
63.3k
{
283
63.3k
    const char *cur;
284
285
63.3k
    if (str == NULL)
286
0
        return (-1);
287
288
63.3k
    cur = *str;
289
290
997k
    while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
291
63.3k
           ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
292
933k
        NEXT(cur);
293
63.3k
    if (uri != NULL) {
294
63.3k
        if (uri->query != NULL)
295
0
            xmlFree(uri->query);
296
63.3k
  if (uri->cleanup & 2)
297
0
      uri->query = STRNDUP(*str, cur - *str);
298
63.3k
  else
299
63.3k
      uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
300
301
  /* Save the raw bytes of the query as well.
302
   * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
303
   */
304
63.3k
  if (uri->query_raw != NULL)
305
0
      xmlFree (uri->query_raw);
306
63.3k
  uri->query_raw = STRNDUP (*str, cur - *str);
307
63.3k
    }
308
63.3k
    *str = cur;
309
63.3k
    return (0);
310
63.3k
}
311
312
/**
313
 * xmlParse3986Port:
314
 * @uri:  pointer to an URI structure
315
 * @str:  the string to analyze
316
 *
317
 * Parse a port part and fills in the appropriate fields
318
 * of the @uri structure
319
 *
320
 * port          = *DIGIT
321
 *
322
 * Returns 0 or the error code
323
 */
324
static int
325
xmlParse3986Port(xmlURIPtr uri, const char **str)
326
32.2k
{
327
32.2k
    const char *cur = *str;
328
32.2k
    unsigned port = 0; /* unsigned for defined overflow behavior */
329
330
32.2k
    if (ISA_DIGIT(cur)) {
331
45.9k
  while (ISA_DIGIT(cur)) {
332
36.9k
      port = port * 10 + (*cur - '0');
333
334
36.9k
      cur++;
335
36.9k
  }
336
8.99k
  if (uri != NULL)
337
8.99k
      uri->port = port & INT_MAX; /* port value modulo INT_MAX+1 */
338
8.99k
  *str = cur;
339
8.99k
  return(0);
340
8.99k
    }
341
23.2k
    return(1);
342
32.2k
}
343
344
/**
345
 * xmlParse3986Userinfo:
346
 * @uri:  pointer to an URI structure
347
 * @str:  the string to analyze
348
 *
349
 * Parse an user informations part and fills in the appropriate fields
350
 * of the @uri structure
351
 *
352
 * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
353
 *
354
 * Returns 0 or the error code
355
 */
356
static int
357
xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
358
233k
{
359
233k
    const char *cur;
360
361
233k
    cur = *str;
362
3.46M
    while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
363
476k
           ISA_SUB_DELIM(cur) || (*cur == ':'))
364
3.22M
  NEXT(cur);
365
233k
    if (*cur == '@') {
366
14.8k
  if (uri != NULL) {
367
14.8k
      if (uri->user != NULL) xmlFree(uri->user);
368
14.8k
      if (uri->cleanup & 2)
369
0
    uri->user = STRNDUP(*str, cur - *str);
370
14.8k
      else
371
14.8k
    uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
372
14.8k
  }
373
14.8k
  *str = cur;
374
14.8k
  return(0);
375
14.8k
    }
376
218k
    return(1);
377
233k
}
378
379
/**
380
 * xmlParse3986DecOctet:
381
 * @str:  the string to analyze
382
 *
383
 *    dec-octet     = DIGIT                 ; 0-9
384
 *                  / %x31-39 DIGIT         ; 10-99
385
 *                  / "1" 2DIGIT            ; 100-199
386
 *                  / "2" %x30-34 DIGIT     ; 200-249
387
 *                  / "25" %x30-35          ; 250-255
388
 *
389
 * Skip a dec-octet.
390
 *
391
 * Returns 0 if found and skipped, 1 otherwise
392
 */
393
static int
394
75.9k
xmlParse3986DecOctet(const char **str) {
395
75.9k
    const char *cur = *str;
396
397
75.9k
    if (!(ISA_DIGIT(cur)))
398
6.99k
        return(1);
399
68.9k
    if (!ISA_DIGIT(cur+1))
400
25.1k
  cur++;
401
43.7k
    else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
402
24.5k
  cur += 2;
403
19.1k
    else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
404
2.58k
  cur += 3;
405
16.5k
    else if ((*cur == '2') && (*(cur + 1) >= '0') &&
406
11.7k
       (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
407
7.22k
  cur += 3;
408
9.37k
    else if ((*cur == '2') && (*(cur + 1) == '5') &&
409
2.16k
       (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
410
2.16k
  cur += 3;
411
7.20k
    else
412
7.20k
        return(1);
413
61.6k
    *str = cur;
414
61.6k
    return(0);
415
68.9k
}
416
/**
417
 * xmlParse3986Host:
418
 * @uri:  pointer to an URI structure
419
 * @str:  the string to analyze
420
 *
421
 * Parse an host part and fills in the appropriate fields
422
 * of the @uri structure
423
 *
424
 * host          = IP-literal / IPv4address / reg-name
425
 * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
426
 * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
427
 * reg-name      = *( unreserved / pct-encoded / sub-delims )
428
 *
429
 * Returns 0 or the error code
430
 */
431
static int
432
xmlParse3986Host(xmlURIPtr uri, const char **str)
433
233k
{
434
233k
    const char *cur = *str;
435
233k
    const char *host;
436
437
233k
    host = cur;
438
    /*
439
     * IPv6 and future adressing scheme are enclosed between brackets
440
     */
441
233k
    if (*cur == '[') {
442
4.10k
        cur++;
443
513k
  while ((*cur != ']') && (*cur != 0))
444
509k
      cur++;
445
4.10k
  if (*cur != ']')
446
2.46k
      return(1);
447
1.64k
  cur++;
448
1.64k
  goto found;
449
4.10k
    }
450
    /*
451
     * try to parse an IPv4
452
     */
453
228k
    if (ISA_DIGIT(cur)) {
454
49.6k
        if (xmlParse3986DecOctet(&cur) != 0)
455
4.58k
      goto not_ipv4;
456
45.0k
  if (*cur != '.')
457
20.1k
      goto not_ipv4;
458
24.9k
  cur++;
459
24.9k
        if (xmlParse3986DecOctet(&cur) != 0)
460
8.32k
      goto not_ipv4;
461
16.6k
  if (*cur != '.')
462
15.3k
      goto not_ipv4;
463
1.30k
        if (xmlParse3986DecOctet(&cur) != 0)
464
1.30k
      goto not_ipv4;
465
0
  if (*cur != '.')
466
0
      goto not_ipv4;
467
0
        if (xmlParse3986DecOctet(&cur) != 0)
468
0
      goto not_ipv4;
469
0
  goto found;
470
49.6k
not_ipv4:
471
49.6k
        cur = *str;
472
49.6k
    }
473
    /*
474
     * then this should be a hostname which can be empty
475
     */
476
3.24M
    while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
477
3.01M
        NEXT(cur);
478
230k
found:
479
230k
    if (uri != NULL) {
480
230k
  if (uri->authority != NULL) xmlFree(uri->authority);
481
230k
  uri->authority = NULL;
482
230k
  if (uri->server != NULL) xmlFree(uri->server);
483
230k
  if (cur != host) {
484
211k
      if (uri->cleanup & 2)
485
0
    uri->server = STRNDUP(host, cur - host);
486
211k
      else
487
211k
    uri->server = xmlURIUnescapeString(host, cur - host, NULL);
488
211k
  } else
489
18.9k
      uri->server = NULL;
490
230k
    }
491
230k
    *str = cur;
492
230k
    return(0);
493
228k
}
494
495
/**
496
 * xmlParse3986Authority:
497
 * @uri:  pointer to an URI structure
498
 * @str:  the string to analyze
499
 *
500
 * Parse an authority part and fills in the appropriate fields
501
 * of the @uri structure
502
 *
503
 * authority     = [ userinfo "@" ] host [ ":" port ]
504
 *
505
 * Returns 0 or the error code
506
 */
507
static int
508
xmlParse3986Authority(xmlURIPtr uri, const char **str)
509
233k
{
510
233k
    const char *cur;
511
233k
    int ret;
512
513
233k
    cur = *str;
514
    /*
515
     * try to parse an userinfo and check for the trailing @
516
     */
517
233k
    ret = xmlParse3986Userinfo(uri, &cur);
518
233k
    if ((ret != 0) || (*cur != '@'))
519
218k
        cur = *str;
520
14.8k
    else
521
14.8k
        cur++;
522
233k
    ret = xmlParse3986Host(uri, &cur);
523
233k
    if (ret != 0) return(ret);
524
230k
    if (*cur == ':') {
525
32.2k
        cur++;
526
32.2k
        ret = xmlParse3986Port(uri, &cur);
527
32.2k
  if (ret != 0) return(ret);
528
32.2k
    }
529
207k
    *str = cur;
530
207k
    return(0);
531
230k
}
532
533
/**
534
 * xmlParse3986Segment:
535
 * @str:  the string to analyze
536
 * @forbid: an optional forbidden character
537
 * @empty: allow an empty segment
538
 *
539
 * Parse a segment and fills in the appropriate fields
540
 * of the @uri structure
541
 *
542
 * segment       = *pchar
543
 * segment-nz    = 1*pchar
544
 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
545
 *               ; non-zero-length segment without any colon ":"
546
 *
547
 * Returns 0 or the error code
548
 */
549
static int
550
xmlParse3986Segment(const char **str, char forbid, int empty)
551
1.28M
{
552
1.28M
    const char *cur;
553
554
1.28M
    cur = *str;
555
1.28M
    if (!ISA_PCHAR(cur)) {
556
356k
        if (empty)
557
329k
      return(0);
558
26.6k
  return(1);
559
356k
    }
560
6.88M
    while (ISA_PCHAR(cur) && (*cur != forbid))
561
5.95M
        NEXT(cur);
562
933k
    *str = cur;
563
933k
    return (0);
564
1.28M
}
565
566
/**
567
 * xmlParse3986PathAbEmpty:
568
 * @uri:  pointer to an URI structure
569
 * @str:  the string to analyze
570
 *
571
 * Parse an path absolute or empty and fills in the appropriate fields
572
 * of the @uri structure
573
 *
574
 * path-abempty  = *( "/" segment )
575
 *
576
 * Returns 0 or the error code
577
 */
578
static int
579
xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
580
207k
{
581
207k
    const char *cur;
582
207k
    int ret;
583
584
207k
    cur = *str;
585
586
636k
    while (*cur == '/') {
587
429k
        cur++;
588
429k
  ret = xmlParse3986Segment(&cur, 0, 1);
589
429k
  if (ret != 0) return(ret);
590
429k
    }
591
207k
    if (uri != NULL) {
592
207k
  if (uri->path != NULL) xmlFree(uri->path);
593
207k
        if (*str != cur) {
594
104k
            if (uri->cleanup & 2)
595
0
                uri->path = STRNDUP(*str, cur - *str);
596
104k
            else
597
104k
                uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
598
104k
        } else {
599
103k
            uri->path = NULL;
600
103k
        }
601
207k
    }
602
207k
    *str = cur;
603
207k
    return (0);
604
207k
}
605
606
/**
607
 * xmlParse3986PathAbsolute:
608
 * @uri:  pointer to an URI structure
609
 * @str:  the string to analyze
610
 *
611
 * Parse an path absolute and fills in the appropriate fields
612
 * of the @uri structure
613
 *
614
 * path-absolute = "/" [ segment-nz *( "/" segment ) ]
615
 *
616
 * Returns 0 or the error code
617
 */
618
static int
619
xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
620
60.0k
{
621
60.0k
    const char *cur;
622
60.0k
    int ret;
623
624
60.0k
    cur = *str;
625
626
60.0k
    if (*cur != '/')
627
0
        return(1);
628
60.0k
    cur++;
629
60.0k
    ret = xmlParse3986Segment(&cur, 0, 0);
630
60.0k
    if (ret == 0) {
631
59.2k
  while (*cur == '/') {
632
25.8k
      cur++;
633
25.8k
      ret = xmlParse3986Segment(&cur, 0, 1);
634
25.8k
      if (ret != 0) return(ret);
635
25.8k
  }
636
33.3k
    }
637
60.0k
    if (uri != NULL) {
638
60.0k
  if (uri->path != NULL) xmlFree(uri->path);
639
60.0k
        if (cur != *str) {
640
60.0k
            if (uri->cleanup & 2)
641
0
                uri->path = STRNDUP(*str, cur - *str);
642
60.0k
            else
643
60.0k
                uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
644
60.0k
        } else {
645
0
            uri->path = NULL;
646
0
        }
647
60.0k
    }
648
60.0k
    *str = cur;
649
60.0k
    return (0);
650
60.0k
}
651
652
/**
653
 * xmlParse3986PathRootless:
654
 * @uri:  pointer to an URI structure
655
 * @str:  the string to analyze
656
 *
657
 * Parse an path without root and fills in the appropriate fields
658
 * of the @uri structure
659
 *
660
 * path-rootless = segment-nz *( "/" segment )
661
 *
662
 * Returns 0 or the error code
663
 */
664
static int
665
xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
666
95.3k
{
667
95.3k
    const char *cur;
668
95.3k
    int ret;
669
670
95.3k
    cur = *str;
671
672
95.3k
    ret = xmlParse3986Segment(&cur, 0, 0);
673
95.3k
    if (ret != 0) return(ret);
674
142k
    while (*cur == '/') {
675
46.7k
        cur++;
676
46.7k
  ret = xmlParse3986Segment(&cur, 0, 1);
677
46.7k
  if (ret != 0) return(ret);
678
46.7k
    }
679
95.3k
    if (uri != NULL) {
680
95.3k
  if (uri->path != NULL) xmlFree(uri->path);
681
95.3k
        if (cur != *str) {
682
95.3k
            if (uri->cleanup & 2)
683
0
                uri->path = STRNDUP(*str, cur - *str);
684
95.3k
            else
685
95.3k
                uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
686
95.3k
        } else {
687
0
            uri->path = NULL;
688
0
        }
689
95.3k
    }
690
95.3k
    *str = cur;
691
95.3k
    return (0);
692
95.3k
}
693
694
/**
695
 * xmlParse3986PathNoScheme:
696
 * @uri:  pointer to an URI structure
697
 * @str:  the string to analyze
698
 *
699
 * Parse an path which is not a scheme and fills in the appropriate fields
700
 * of the @uri structure
701
 *
702
 * path-noscheme = segment-nz-nc *( "/" segment )
703
 *
704
 * Returns 0 or the error code
705
 */
706
static int
707
xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
708
355k
{
709
355k
    const char *cur;
710
355k
    int ret;
711
712
355k
    cur = *str;
713
714
355k
    ret = xmlParse3986Segment(&cur, ':', 0);
715
355k
    if (ret != 0) return(ret);
716
632k
    while (*cur == '/') {
717
277k
        cur++;
718
277k
  ret = xmlParse3986Segment(&cur, 0, 1);
719
277k
  if (ret != 0) return(ret);
720
277k
    }
721
355k
    if (uri != NULL) {
722
355k
  if (uri->path != NULL) xmlFree(uri->path);
723
355k
        if (cur != *str) {
724
349k
            if (uri->cleanup & 2)
725
0
                uri->path = STRNDUP(*str, cur - *str);
726
349k
            else
727
349k
                uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
728
349k
        } else {
729
5.55k
            uri->path = NULL;
730
5.55k
        }
731
355k
    }
732
355k
    *str = cur;
733
355k
    return (0);
734
355k
}
735
736
/**
737
 * xmlParse3986HierPart:
738
 * @uri:  pointer to an URI structure
739
 * @str:  the string to analyze
740
 *
741
 * Parse an hierarchical part and fills in the appropriate fields
742
 * of the @uri structure
743
 *
744
 * hier-part     = "//" authority path-abempty
745
 *                / path-absolute
746
 *                / path-rootless
747
 *                / path-empty
748
 *
749
 * Returns 0 or the error code
750
 */
751
static int
752
xmlParse3986HierPart(xmlURIPtr uri, const char **str)
753
290k
{
754
290k
    const char *cur;
755
290k
    int ret;
756
757
290k
    cur = *str;
758
759
290k
    if ((*cur == '/') && (*(cur + 1) == '/')) {
760
144k
        cur += 2;
761
144k
  ret = xmlParse3986Authority(uri, &cur);
762
144k
  if (ret != 0) return(ret);
763
131k
  if (uri->server == NULL)
764
3.61k
      uri->port = -1;
765
131k
  ret = xmlParse3986PathAbEmpty(uri, &cur);
766
131k
  if (ret != 0) return(ret);
767
131k
  *str = cur;
768
131k
  return(0);
769
145k
    } else if (*cur == '/') {
770
12.2k
        ret = xmlParse3986PathAbsolute(uri, &cur);
771
12.2k
  if (ret != 0) return(ret);
772
133k
    } else if (ISA_PCHAR(cur)) {
773
95.3k
        ret = xmlParse3986PathRootless(uri, &cur);
774
95.3k
  if (ret != 0) return(ret);
775
95.3k
    } else {
776
  /* path-empty is effectively empty */
777
37.6k
  if (uri != NULL) {
778
37.6k
      if (uri->path != NULL) xmlFree(uri->path);
779
37.6k
      uri->path = NULL;
780
37.6k
  }
781
37.6k
    }
782
145k
    *str = cur;
783
145k
    return (0);
784
290k
}
785
786
/**
787
 * xmlParse3986RelativeRef:
788
 * @uri:  pointer to an URI structure
789
 * @str:  the string to analyze
790
 *
791
 * Parse an URI string and fills in the appropriate fields
792
 * of the @uri structure
793
 *
794
 * relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
795
 * relative-part = "//" authority path-abempty
796
 *               / path-absolute
797
 *               / path-noscheme
798
 *               / path-empty
799
 *
800
 * Returns 0 or the error code
801
 */
802
static int
803
753k
xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
804
753k
    int ret;
805
806
753k
    if ((*str == '/') && (*(str + 1) == '/')) {
807
88.1k
        str += 2;
808
88.1k
  ret = xmlParse3986Authority(uri, &str);
809
88.1k
  if (ret != 0) return(ret);
810
75.3k
  ret = xmlParse3986PathAbEmpty(uri, &str);
811
75.3k
  if (ret != 0) return(ret);
812
664k
    } else if (*str == '/') {
813
47.7k
  ret = xmlParse3986PathAbsolute(uri, &str);
814
47.7k
  if (ret != 0) return(ret);
815
617k
    } else if (ISA_PCHAR(str)) {
816
355k
        ret = xmlParse3986PathNoScheme(uri, &str);
817
355k
  if (ret != 0) return(ret);
818
355k
    } else {
819
  /* path-empty is effectively empty */
820
262k
  if (uri != NULL) {
821
262k
      if (uri->path != NULL) xmlFree(uri->path);
822
262k
      uri->path = NULL;
823
262k
  }
824
262k
    }
825
826
740k
    if (*str == '?') {
827
51.0k
  str++;
828
51.0k
  ret = xmlParse3986Query(uri, &str);
829
51.0k
  if (ret != 0) return(ret);
830
51.0k
    }
831
740k
    if (*str == '#') {
832
39.9k
  str++;
833
39.9k
  ret = xmlParse3986Fragment(uri, &str);
834
39.9k
  if (ret != 0) return(ret);
835
39.9k
    }
836
740k
    if (*str != 0) {
837
388k
  xmlCleanURI(uri);
838
388k
  return(1);
839
388k
    }
840
351k
    return(0);
841
740k
}
842
843
844
/**
845
 * xmlParse3986URI:
846
 * @uri:  pointer to an URI structure
847
 * @str:  the string to analyze
848
 *
849
 * Parse an URI string and fills in the appropriate fields
850
 * of the @uri structure
851
 *
852
 * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
853
 *
854
 * Returns 0 or the error code
855
 */
856
static int
857
917k
xmlParse3986URI(xmlURIPtr uri, const char *str) {
858
917k
    int ret;
859
860
917k
    ret = xmlParse3986Scheme(uri, &str);
861
917k
    if (ret != 0) return(ret);
862
381k
    if (*str != ':') {
863
91.5k
  return(1);
864
91.5k
    }
865
290k
    str++;
866
290k
    ret = xmlParse3986HierPart(uri, &str);
867
290k
    if (ret != 0) return(ret);
868
277k
    if (*str == '?') {
869
12.2k
  str++;
870
12.2k
  ret = xmlParse3986Query(uri, &str);
871
12.2k
  if (ret != 0) return(ret);
872
12.2k
    }
873
277k
    if (*str == '#') {
874
10.1k
  str++;
875
10.1k
  ret = xmlParse3986Fragment(uri, &str);
876
10.1k
  if (ret != 0) return(ret);
877
10.1k
    }
878
277k
    if (*str != 0) {
879
112k
  xmlCleanURI(uri);
880
112k
  return(1);
881
112k
    }
882
164k
    return(0);
883
277k
}
884
885
/**
886
 * xmlParse3986URIReference:
887
 * @uri:  pointer to an URI structure
888
 * @str:  the string to analyze
889
 *
890
 * Parse an URI reference string and fills in the appropriate fields
891
 * of the @uri structure
892
 *
893
 * URI-reference = URI / relative-ref
894
 *
895
 * Returns 0 or the error code
896
 */
897
static int
898
917k
xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
899
917k
    int ret;
900
901
917k
    if (str == NULL)
902
0
  return(-1);
903
917k
    xmlCleanURI(uri);
904
905
    /*
906
     * Try first to parse absolute refs, then fallback to relative if
907
     * it fails.
908
     */
909
917k
    ret = xmlParse3986URI(uri, str);
910
917k
    if (ret != 0) {
911
753k
  xmlCleanURI(uri);
912
753k
        ret = xmlParse3986RelativeRef(uri, str);
913
753k
  if (ret != 0) {
914
401k
      xmlCleanURI(uri);
915
401k
      return(ret);
916
401k
  }
917
753k
    }
918
516k
    return(0);
919
917k
}
920
921
/**
922
 * xmlParseURI:
923
 * @str:  the URI string to analyze
924
 *
925
 * Parse an URI based on RFC 3986
926
 *
927
 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
928
 *
929
 * Returns a newly built xmlURIPtr or NULL in case of error
930
 */
931
xmlURIPtr
932
905k
xmlParseURI(const char *str) {
933
905k
    xmlURIPtr uri;
934
905k
    int ret;
935
936
905k
    if (str == NULL)
937
0
  return(NULL);
938
905k
    uri = xmlCreateURI();
939
905k
    if (uri != NULL) {
940
905k
  ret = xmlParse3986URIReference(uri, str);
941
905k
        if (ret) {
942
399k
      xmlFreeURI(uri);
943
399k
      return(NULL);
944
399k
  }
945
905k
    }
946
505k
    return(uri);
947
905k
}
948
949
/**
950
 * xmlParseURIReference:
951
 * @uri:  pointer to an URI structure
952
 * @str:  the string to analyze
953
 *
954
 * Parse an URI reference string based on RFC 3986 and fills in the
955
 * appropriate fields of the @uri structure
956
 *
957
 * URI-reference = URI / relative-ref
958
 *
959
 * Returns 0 or the error code
960
 */
961
int
962
12.8k
xmlParseURIReference(xmlURIPtr uri, const char *str) {
963
12.8k
    return(xmlParse3986URIReference(uri, str));
964
12.8k
}
965
966
/**
967
 * xmlParseURIRaw:
968
 * @str:  the URI string to analyze
969
 * @raw:  if 1 unescaping of URI pieces are disabled
970
 *
971
 * Parse an URI but allows to keep intact the original fragments.
972
 *
973
 * URI-reference = URI / relative-ref
974
 *
975
 * Returns a newly built xmlURIPtr or NULL in case of error
976
 */
977
xmlURIPtr
978
0
xmlParseURIRaw(const char *str, int raw) {
979
0
    xmlURIPtr uri;
980
0
    int ret;
981
982
0
    if (str == NULL)
983
0
  return(NULL);
984
0
    uri = xmlCreateURI();
985
0
    if (uri != NULL) {
986
0
        if (raw) {
987
0
      uri->cleanup |= 2;
988
0
  }
989
0
  ret = xmlParseURIReference(uri, str);
990
0
        if (ret) {
991
0
      xmlFreeURI(uri);
992
0
      return(NULL);
993
0
  }
994
0
    }
995
0
    return(uri);
996
0
}
997
998
/************************************************************************
999
 *                  *
1000
 *      Generic URI structure functions     *
1001
 *                  *
1002
 ************************************************************************/
1003
1004
/**
1005
 * xmlCreateURI:
1006
 *
1007
 * Simply creates an empty xmlURI
1008
 *
1009
 * Returns the new structure or NULL in case of error
1010
 */
1011
xmlURIPtr
1012
922k
xmlCreateURI(void) {
1013
922k
    xmlURIPtr ret;
1014
1015
922k
    ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
1016
922k
    if (ret == NULL) {
1017
0
        xmlURIErrMemory("creating URI structure\n");
1018
0
  return(NULL);
1019
0
    }
1020
922k
    memset(ret, 0, sizeof(xmlURI));
1021
922k
    return(ret);
1022
922k
}
1023
1024
/**
1025
 * xmlSaveUriRealloc:
1026
 *
1027
 * Function to handle properly a reallocation when saving an URI
1028
 * Also imposes some limit on the length of an URI string output
1029
 */
1030
static xmlChar *
1031
4.04k
xmlSaveUriRealloc(xmlChar *ret, int *max) {
1032
4.04k
    xmlChar *temp;
1033
4.04k
    int tmp;
1034
1035
4.04k
    if (*max > MAX_URI_LENGTH) {
1036
0
        xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
1037
0
        return(NULL);
1038
0
    }
1039
4.04k
    tmp = *max * 2;
1040
4.04k
    temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
1041
4.04k
    if (temp == NULL) {
1042
0
        xmlURIErrMemory("saving URI\n");
1043
0
        return(NULL);
1044
0
    }
1045
4.04k
    *max = tmp;
1046
4.04k
    return(temp);
1047
4.04k
}
1048
1049
/**
1050
 * xmlSaveUri:
1051
 * @uri:  pointer to an xmlURI
1052
 *
1053
 * Save the URI as an escaped string
1054
 *
1055
 * Returns a new string (to be deallocated by caller)
1056
 */
1057
xmlChar *
1058
6.81k
xmlSaveUri(xmlURIPtr uri) {
1059
6.81k
    xmlChar *ret = NULL;
1060
6.81k
    xmlChar *temp;
1061
6.81k
    const char *p;
1062
6.81k
    int len;
1063
6.81k
    int max;
1064
1065
6.81k
    if (uri == NULL) return(NULL);
1066
1067
1068
6.81k
    max = 80;
1069
6.81k
    ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
1070
6.81k
    if (ret == NULL) {
1071
0
        xmlURIErrMemory("saving URI\n");
1072
0
  return(NULL);
1073
0
    }
1074
6.81k
    len = 0;
1075
1076
6.81k
    if (uri->scheme != NULL) {
1077
0
  p = uri->scheme;
1078
0
  while (*p != 0) {
1079
0
      if (len >= max) {
1080
0
                temp = xmlSaveUriRealloc(ret, &max);
1081
0
                if (temp == NULL) goto mem_error;
1082
0
    ret = temp;
1083
0
      }
1084
0
      ret[len++] = *p++;
1085
0
  }
1086
0
  if (len >= max) {
1087
0
            temp = xmlSaveUriRealloc(ret, &max);
1088
0
            if (temp == NULL) goto mem_error;
1089
0
            ret = temp;
1090
0
  }
1091
0
  ret[len++] = ':';
1092
0
    }
1093
6.81k
    if (uri->opaque != NULL) {
1094
0
  p = uri->opaque;
1095
0
  while (*p != 0) {
1096
0
      if (len + 3 >= max) {
1097
0
                temp = xmlSaveUriRealloc(ret, &max);
1098
0
                if (temp == NULL) goto mem_error;
1099
0
                ret = temp;
1100
0
      }
1101
0
      if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1102
0
    ret[len++] = *p++;
1103
0
      else {
1104
0
    int val = *(unsigned char *)p++;
1105
0
    int hi = val / 0x10, lo = val % 0x10;
1106
0
    ret[len++] = '%';
1107
0
    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1108
0
    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1109
0
      }
1110
0
  }
1111
6.81k
    } else {
1112
6.81k
  if ((uri->server != NULL) || (uri->port == -1)) {
1113
2.46k
      if (len + 3 >= max) {
1114
0
                temp = xmlSaveUriRealloc(ret, &max);
1115
0
                if (temp == NULL) goto mem_error;
1116
0
                ret = temp;
1117
0
      }
1118
2.46k
      ret[len++] = '/';
1119
2.46k
      ret[len++] = '/';
1120
2.46k
      if (uri->user != NULL) {
1121
1.54k
    p = uri->user;
1122
1.05M
    while (*p != 0) {
1123
1.05M
        if (len + 3 >= max) {
1124
1.60k
                        temp = xmlSaveUriRealloc(ret, &max);
1125
1.60k
                        if (temp == NULL) goto mem_error;
1126
1.60k
                        ret = temp;
1127
1.60k
        }
1128
1.05M
        if ((IS_UNRESERVED(*(p))) ||
1129
687k
      ((*(p) == ';')) || ((*(p) == ':')) ||
1130
666k
      ((*(p) == '&')) || ((*(p) == '=')) ||
1131
601k
      ((*(p) == '+')) || ((*(p) == '$')) ||
1132
593k
      ((*(p) == ',')))
1133
491k
      ret[len++] = *p++;
1134
565k
        else {
1135
565k
      int val = *(unsigned char *)p++;
1136
565k
      int hi = val / 0x10, lo = val % 0x10;
1137
565k
      ret[len++] = '%';
1138
565k
      ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1139
565k
      ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1140
565k
        }
1141
1.05M
    }
1142
1.54k
    if (len + 3 >= max) {
1143
19
                    temp = xmlSaveUriRealloc(ret, &max);
1144
19
                    if (temp == NULL) goto mem_error;
1145
19
                    ret = temp;
1146
19
    }
1147
1.54k
    ret[len++] = '@';
1148
1.54k
      }
1149
2.46k
      if (uri->server != NULL) {
1150
2.46k
    p = uri->server;
1151
341k
    while (*p != 0) {
1152
339k
        if (len >= max) {
1153
359
      temp = xmlSaveUriRealloc(ret, &max);
1154
359
      if (temp == NULL) goto mem_error;
1155
359
      ret = temp;
1156
359
        }
1157
339k
        ret[len++] = *p++;
1158
339k
    }
1159
2.46k
    if (uri->port > 0) {
1160
142
        if (len + 10 >= max) {
1161
10
      temp = xmlSaveUriRealloc(ret, &max);
1162
10
      if (temp == NULL) goto mem_error;
1163
10
      ret = temp;
1164
10
        }
1165
142
        len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1166
142
    }
1167
2.46k
      }
1168
4.35k
  } else if (uri->authority != NULL) {
1169
0
      if (len + 3 >= max) {
1170
0
                temp = xmlSaveUriRealloc(ret, &max);
1171
0
                if (temp == NULL) goto mem_error;
1172
0
                ret = temp;
1173
0
      }
1174
0
      ret[len++] = '/';
1175
0
      ret[len++] = '/';
1176
0
      p = uri->authority;
1177
0
      while (*p != 0) {
1178
0
    if (len + 3 >= max) {
1179
0
                    temp = xmlSaveUriRealloc(ret, &max);
1180
0
                    if (temp == NULL) goto mem_error;
1181
0
                    ret = temp;
1182
0
    }
1183
0
    if ((IS_UNRESERVED(*(p))) ||
1184
0
                    ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1185
0
                    ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1186
0
                    ((*(p) == '=')) || ((*(p) == '+')))
1187
0
        ret[len++] = *p++;
1188
0
    else {
1189
0
        int val = *(unsigned char *)p++;
1190
0
        int hi = val / 0x10, lo = val % 0x10;
1191
0
        ret[len++] = '%';
1192
0
        ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1193
0
        ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1194
0
    }
1195
0
      }
1196
4.35k
  } else if (uri->scheme != NULL) {
1197
0
      if (len + 3 >= max) {
1198
0
                temp = xmlSaveUriRealloc(ret, &max);
1199
0
                if (temp == NULL) goto mem_error;
1200
0
                ret = temp;
1201
0
      }
1202
0
  }
1203
6.81k
  if (uri->path != NULL) {
1204
3.89k
      p = uri->path;
1205
      /*
1206
       * the colon in file:///d: should not be escaped or
1207
       * Windows accesses fail later.
1208
       */
1209
3.89k
      if ((uri->scheme != NULL) &&
1210
0
    (p[0] == '/') &&
1211
0
    (((p[1] >= 'a') && (p[1] <= 'z')) ||
1212
0
     ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1213
0
    (p[2] == ':') &&
1214
0
          (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1215
0
    if (len + 3 >= max) {
1216
0
                    temp = xmlSaveUriRealloc(ret, &max);
1217
0
                    if (temp == NULL) goto mem_error;
1218
0
                    ret = temp;
1219
0
    }
1220
0
    ret[len++] = *p++;
1221
0
    ret[len++] = *p++;
1222
0
    ret[len++] = *p++;
1223
0
      }
1224
426k
      while (*p != 0) {
1225
422k
    if (len + 3 >= max) {
1226
779
                    temp = xmlSaveUriRealloc(ret, &max);
1227
779
                    if (temp == NULL) goto mem_error;
1228
779
                    ret = temp;
1229
779
    }
1230
422k
    if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1231
53.0k
                    ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1232
30.6k
              ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1233
14.1k
              ((*(p) == ',')))
1234
413k
        ret[len++] = *p++;
1235
9.06k
    else {
1236
9.06k
        int val = *(unsigned char *)p++;
1237
9.06k
        int hi = val / 0x10, lo = val % 0x10;
1238
9.06k
        ret[len++] = '%';
1239
9.06k
        ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1240
9.06k
        ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1241
9.06k
    }
1242
422k
      }
1243
3.89k
  }
1244
6.81k
  if (uri->query_raw != NULL) {
1245
993
      if (len + 1 >= max) {
1246
34
                temp = xmlSaveUriRealloc(ret, &max);
1247
34
                if (temp == NULL) goto mem_error;
1248
34
                ret = temp;
1249
34
      }
1250
993
      ret[len++] = '?';
1251
993
      p = uri->query_raw;
1252
170k
      while (*p != 0) {
1253
169k
    if (len + 1 >= max) {
1254
342
                    temp = xmlSaveUriRealloc(ret, &max);
1255
342
                    if (temp == NULL) goto mem_error;
1256
342
                    ret = temp;
1257
342
    }
1258
169k
    ret[len++] = *p++;
1259
169k
      }
1260
5.82k
  } else if (uri->query != NULL) {
1261
0
      if (len + 3 >= max) {
1262
0
                temp = xmlSaveUriRealloc(ret, &max);
1263
0
                if (temp == NULL) goto mem_error;
1264
0
                ret = temp;
1265
0
      }
1266
0
      ret[len++] = '?';
1267
0
      p = uri->query;
1268
0
      while (*p != 0) {
1269
0
    if (len + 3 >= max) {
1270
0
                    temp = xmlSaveUriRealloc(ret, &max);
1271
0
                    if (temp == NULL) goto mem_error;
1272
0
                    ret = temp;
1273
0
    }
1274
0
    if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1275
0
        ret[len++] = *p++;
1276
0
    else {
1277
0
        int val = *(unsigned char *)p++;
1278
0
        int hi = val / 0x10, lo = val % 0x10;
1279
0
        ret[len++] = '%';
1280
0
        ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1281
0
        ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1282
0
    }
1283
0
      }
1284
0
  }
1285
6.81k
    }
1286
6.81k
    if (uri->fragment != NULL) {
1287
2.15k
  if (len + 3 >= max) {
1288
34
            temp = xmlSaveUriRealloc(ret, &max);
1289
34
            if (temp == NULL) goto mem_error;
1290
34
            ret = temp;
1291
34
  }
1292
2.15k
  ret[len++] = '#';
1293
2.15k
  p = uri->fragment;
1294
802k
  while (*p != 0) {
1295
800k
      if (len + 3 >= max) {
1296
844
                temp = xmlSaveUriRealloc(ret, &max);
1297
844
                if (temp == NULL) goto mem_error;
1298
844
                ret = temp;
1299
844
      }
1300
800k
      if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1301
795k
    ret[len++] = *p++;
1302
4.66k
      else {
1303
4.66k
    int val = *(unsigned char *)p++;
1304
4.66k
    int hi = val / 0x10, lo = val % 0x10;
1305
4.66k
    ret[len++] = '%';
1306
4.66k
    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1307
4.66k
    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1308
4.66k
      }
1309
800k
  }
1310
2.15k
    }
1311
6.81k
    if (len >= max) {
1312
18
        temp = xmlSaveUriRealloc(ret, &max);
1313
18
        if (temp == NULL) goto mem_error;
1314
18
        ret = temp;
1315
18
    }
1316
6.81k
    ret[len] = 0;
1317
6.81k
    return(ret);
1318
1319
0
mem_error:
1320
0
    xmlFree(ret);
1321
0
    return(NULL);
1322
6.81k
}
1323
1324
/**
1325
 * xmlPrintURI:
1326
 * @stream:  a FILE* for the output
1327
 * @uri:  pointer to an xmlURI
1328
 *
1329
 * Prints the URI in the stream @stream.
1330
 */
1331
void
1332
0
xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1333
0
    xmlChar *out;
1334
1335
0
    out = xmlSaveUri(uri);
1336
0
    if (out != NULL) {
1337
0
  fprintf(stream, "%s", (char *) out);
1338
0
  xmlFree(out);
1339
0
    }
1340
0
}
1341
1342
/**
1343
 * xmlCleanURI:
1344
 * @uri:  pointer to an xmlURI
1345
 *
1346
 * Make sure the xmlURI struct is free of content
1347
 */
1348
static void
1349
2.57M
xmlCleanURI(xmlURIPtr uri) {
1350
2.57M
    if (uri == NULL) return;
1351
1352
2.57M
    if (uri->scheme != NULL) xmlFree(uri->scheme);
1353
2.57M
    uri->scheme = NULL;
1354
2.57M
    if (uri->server != NULL) xmlFree(uri->server);
1355
2.57M
    uri->server = NULL;
1356
2.57M
    if (uri->user != NULL) xmlFree(uri->user);
1357
2.57M
    uri->user = NULL;
1358
2.57M
    if (uri->path != NULL) xmlFree(uri->path);
1359
2.57M
    uri->path = NULL;
1360
2.57M
    if (uri->fragment != NULL) xmlFree(uri->fragment);
1361
2.57M
    uri->fragment = NULL;
1362
2.57M
    if (uri->opaque != NULL) xmlFree(uri->opaque);
1363
2.57M
    uri->opaque = NULL;
1364
2.57M
    if (uri->authority != NULL) xmlFree(uri->authority);
1365
2.57M
    uri->authority = NULL;
1366
2.57M
    if (uri->query != NULL) xmlFree(uri->query);
1367
2.57M
    uri->query = NULL;
1368
2.57M
    if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1369
2.57M
    uri->query_raw = NULL;
1370
2.57M
}
1371
1372
/**
1373
 * xmlFreeURI:
1374
 * @uri:  pointer to an xmlURI
1375
 *
1376
 * Free up the xmlURI struct
1377
 */
1378
void
1379
922k
xmlFreeURI(xmlURIPtr uri) {
1380
922k
    if (uri == NULL) return;
1381
1382
922k
    if (uri->scheme != NULL) xmlFree(uri->scheme);
1383
922k
    if (uri->server != NULL) xmlFree(uri->server);
1384
922k
    if (uri->user != NULL) xmlFree(uri->user);
1385
922k
    if (uri->path != NULL) xmlFree(uri->path);
1386
922k
    if (uri->fragment != NULL) xmlFree(uri->fragment);
1387
922k
    if (uri->opaque != NULL) xmlFree(uri->opaque);
1388
922k
    if (uri->authority != NULL) xmlFree(uri->authority);
1389
922k
    if (uri->query != NULL) xmlFree(uri->query);
1390
922k
    if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1391
922k
    xmlFree(uri);
1392
922k
}
1393
1394
/************************************************************************
1395
 *                  *
1396
 *      Helper functions        *
1397
 *                  *
1398
 ************************************************************************/
1399
1400
/**
1401
 * xmlNormalizeURIPath:
1402
 * @path:  pointer to the path string
1403
 *
1404
 * Applies the 5 normalization steps to a path string--that is, RFC 2396
1405
 * Section 5.2, steps 6.c through 6.g.
1406
 *
1407
 * Normalization occurs directly on the string, no new allocation is done
1408
 *
1409
 * Returns 0 or an error code
1410
 */
1411
int
1412
2.06k
xmlNormalizeURIPath(char *path) {
1413
2.06k
    char *cur, *out;
1414
1415
2.06k
    if (path == NULL)
1416
0
  return(-1);
1417
1418
    /* Skip all initial "/" chars.  We want to get to the beginning of the
1419
     * first non-empty segment.
1420
     */
1421
2.06k
    cur = path;
1422
2.28k
    while (cur[0] == '/')
1423
217
      ++cur;
1424
2.06k
    if (cur[0] == '\0')
1425
9
      return(0);
1426
1427
    /* Keep everything we've seen so far.  */
1428
2.05k
    out = cur;
1429
1430
    /*
1431
     * Analyze each segment in sequence for cases (c) and (d).
1432
     */
1433
20.6k
    while (cur[0] != '\0') {
1434
  /*
1435
   * c) All occurrences of "./", where "." is a complete path segment,
1436
   *    are removed from the buffer string.
1437
   */
1438
20.5k
  if ((cur[0] == '.') && (cur[1] == '/')) {
1439
3.03k
      cur += 2;
1440
      /* '//' normalization should be done at this point too */
1441
4.68k
      while (cur[0] == '/')
1442
1.64k
    cur++;
1443
3.03k
      continue;
1444
3.03k
  }
1445
1446
  /*
1447
   * d) If the buffer string ends with "." as a complete path segment,
1448
   *    that "." is removed.
1449
   */
1450
17.5k
  if ((cur[0] == '.') && (cur[1] == '\0'))
1451
103
      break;
1452
1453
  /* Otherwise keep the segment.  */
1454
159k
  while (cur[0] != '/') {
1455
143k
            if (cur[0] == '\0')
1456
1.83k
              goto done_cd;
1457
141k
      (out++)[0] = (cur++)[0];
1458
141k
  }
1459
  /* nomalize // */
1460
19.9k
  while ((cur[0] == '/') && (cur[1] == '/'))
1461
4.38k
      cur++;
1462
1463
15.5k
        (out++)[0] = (cur++)[0];
1464
15.5k
    }
1465
2.05k
 done_cd:
1466
2.05k
    out[0] = '\0';
1467
1468
    /* Reset to the beginning of the first segment for the next sequence.  */
1469
2.05k
    cur = path;
1470
2.27k
    while (cur[0] == '/')
1471
217
      ++cur;
1472
2.05k
    if (cur[0] == '\0')
1473
15
  return(0);
1474
1475
    /*
1476
     * Analyze each segment in sequence for cases (e) and (f).
1477
     *
1478
     * e) All occurrences of "<segment>/../", where <segment> is a
1479
     *    complete path segment not equal to "..", are removed from the
1480
     *    buffer string.  Removal of these path segments is performed
1481
     *    iteratively, removing the leftmost matching pattern on each
1482
     *    iteration, until no matching pattern remains.
1483
     *
1484
     * f) If the buffer string ends with "<segment>/..", where <segment>
1485
     *    is a complete path segment not equal to "..", that
1486
     *    "<segment>/.." is removed.
1487
     *
1488
     * To satisfy the "iterative" clause in (e), we need to collapse the
1489
     * string every time we find something that needs to be removed.  Thus,
1490
     * we don't need to keep two pointers into the string: we only need a
1491
     * "current position" pointer.
1492
     */
1493
16.9k
    while (1) {
1494
16.9k
        char *segp, *tmp;
1495
1496
        /* At the beginning of each iteration of this loop, "cur" points to
1497
         * the first character of the segment we want to examine.
1498
         */
1499
1500
        /* Find the end of the current segment.  */
1501
16.9k
        segp = cur;
1502
163k
        while ((segp[0] != '/') && (segp[0] != '\0'))
1503
146k
          ++segp;
1504
1505
        /* If this is the last segment, we're done (we need at least two
1506
         * segments to meet the criteria for the (e) and (f) cases).
1507
         */
1508
16.9k
        if (segp[0] == '\0')
1509
1.98k
          break;
1510
1511
        /* If the first segment is "..", or if the next segment _isn't_ "..",
1512
         * keep this segment and try the next one.
1513
         */
1514
14.9k
        ++segp;
1515
14.9k
        if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1516
14.0k
            || ((segp[0] != '.') || (segp[1] != '.')
1517
9.56k
                || ((segp[2] != '/') && (segp[2] != '\0')))) {
1518
9.56k
          cur = segp;
1519
9.56k
          continue;
1520
9.56k
        }
1521
1522
        /* If we get here, remove this segment and the next one and back up
1523
         * to the previous segment (if there is one), to implement the
1524
         * "iteratively" clause.  It's pretty much impossible to back up
1525
         * while maintaining two pointers into the buffer, so just compact
1526
         * the whole buffer now.
1527
         */
1528
1529
        /* If this is the end of the buffer, we're done.  */
1530
5.37k
        if (segp[2] == '\0') {
1531
59
          cur[0] = '\0';
1532
59
          break;
1533
59
        }
1534
        /* Valgrind complained, strcpy(cur, segp + 3); */
1535
        /* string will overlap, do not use strcpy */
1536
5.31k
        tmp = cur;
1537
5.31k
        segp += 3;
1538
2.13M
        while ((*tmp++ = *segp++) != 0)
1539
2.13M
          ;
1540
1541
        /* If there are no previous segments, then keep going from here.  */
1542
5.31k
        segp = cur;
1543
10.0k
        while ((segp > path) && ((--segp)[0] == '/'))
1544
4.75k
          ;
1545
5.31k
        if (segp == path)
1546
651
          continue;
1547
1548
        /* "segp" is pointing to the end of a previous segment; find it's
1549
         * start.  We need to back up to the previous segment and start
1550
         * over with that to handle things like "foo/bar/../..".  If we
1551
         * don't do this, then on the first pass we'll remove the "bar/..",
1552
         * but be pointing at the second ".." so we won't realize we can also
1553
         * remove the "foo/..".
1554
         */
1555
4.66k
        cur = segp;
1556
15.4k
        while ((cur > path) && (cur[-1] != '/'))
1557
10.7k
          --cur;
1558
4.66k
    }
1559
2.04k
    out[0] = '\0';
1560
1561
    /*
1562
     * g) If the resulting buffer string still begins with one or more
1563
     *    complete path segments of "..", then the reference is
1564
     *    considered to be in error. Implementations may handle this
1565
     *    error by retaining these components in the resolved path (i.e.,
1566
     *    treating them as part of the final URI), by removing them from
1567
     *    the resolved path (i.e., discarding relative levels above the
1568
     *    root), or by avoiding traversal of the reference.
1569
     *
1570
     * We discard them from the final path.
1571
     */
1572
2.04k
    if (path[0] == '/') {
1573
217
      cur = path;
1574
217
      while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1575
0
             && ((cur[3] == '/') || (cur[3] == '\0')))
1576
0
  cur += 3;
1577
1578
217
      if (cur != path) {
1579
0
  out = path;
1580
0
  while (cur[0] != '\0')
1581
0
          (out++)[0] = (cur++)[0];
1582
0
  out[0] = 0;
1583
0
      }
1584
217
    }
1585
1586
2.04k
    return(0);
1587
2.05k
}
1588
1589
462k
static int is_hex(char c) {
1590
462k
    if (((c >= '0') && (c <= '9')) ||
1591
361k
        ((c >= 'a') && (c <= 'f')) ||
1592
220k
        ((c >= 'A') && (c <= 'F')))
1593
389k
  return(1);
1594
73.2k
    return(0);
1595
462k
}
1596
1597
/**
1598
 * xmlURIUnescapeString:
1599
 * @str:  the string to unescape
1600
 * @len:   the length in bytes to unescape (or <= 0 to indicate full string)
1601
 * @target:  optional destination buffer
1602
 *
1603
 * Unescaping routine, but does not check that the string is an URI. The
1604
 * output is a direct unsigned char translation of %XX values (no encoding)
1605
 * Note that the length of the result can only be smaller or same size as
1606
 * the input string.
1607
 *
1608
 * Returns a copy of the string, but unescaped, will return NULL only in case
1609
 * of error
1610
 */
1611
char *
1612
948k
xmlURIUnescapeString(const char *str, int len, char *target) {
1613
948k
    char *ret, *out;
1614
948k
    const char *in;
1615
1616
948k
    if (str == NULL)
1617
0
  return(NULL);
1618
948k
    if (len <= 0) len = strlen(str);
1619
948k
    if (len < 0) return(NULL);
1620
1621
948k
    if (target == NULL) {
1622
948k
  ret = (char *) xmlMallocAtomic(len + 1);
1623
948k
  if (ret == NULL) {
1624
0
            xmlURIErrMemory("unescaping URI value\n");
1625
0
      return(NULL);
1626
0
  }
1627
948k
    } else
1628
0
  ret = target;
1629
948k
    in = str;
1630
948k
    out = ret;
1631
294M
    while(len > 0) {
1632
293M
  if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1633
186k
      in++;
1634
186k
      if ((*in >= '0') && (*in <= '9'))
1635
50.5k
          *out = (*in - '0');
1636
135k
      else if ((*in >= 'a') && (*in <= 'f'))
1637
60.2k
          *out = (*in - 'a') + 10;
1638
75.4k
      else if ((*in >= 'A') && (*in <= 'F'))
1639
75.4k
          *out = (*in - 'A') + 10;
1640
186k
      in++;
1641
186k
      if ((*in >= '0') && (*in <= '9'))
1642
47.2k
          *out = *out * 16 + (*in - '0');
1643
138k
      else if ((*in >= 'a') && (*in <= 'f'))
1644
70.4k
          *out = *out * 16 + (*in - 'a') + 10;
1645
68.4k
      else if ((*in >= 'A') && (*in <= 'F'))
1646
68.4k
          *out = *out * 16 + (*in - 'A') + 10;
1647
186k
      in++;
1648
186k
      len -= 3;
1649
186k
      out++;
1650
293M
  } else {
1651
293M
      *out++ = *in++;
1652
293M
      len--;
1653
293M
  }
1654
293M
    }
1655
948k
    *out = 0;
1656
948k
    return(ret);
1657
948k
}
1658
1659
/**
1660
 * xmlURIEscapeStr:
1661
 * @str:  string to escape
1662
 * @list: exception list string of chars not to escape
1663
 *
1664
 * This routine escapes a string to hex, ignoring reserved characters (a-z)
1665
 * and the characters in the exception list.
1666
 *
1667
 * Returns a new escaped string or NULL in case of error.
1668
 */
1669
xmlChar *
1670
0
xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1671
0
    xmlChar *ret, ch;
1672
0
    xmlChar *temp;
1673
0
    const xmlChar *in;
1674
0
    int len, out;
1675
1676
0
    if (str == NULL)
1677
0
  return(NULL);
1678
0
    if (str[0] == 0)
1679
0
  return(xmlStrdup(str));
1680
0
    len = xmlStrlen(str);
1681
0
    if (!(len > 0)) return(NULL);
1682
1683
0
    len += 20;
1684
0
    ret = (xmlChar *) xmlMallocAtomic(len);
1685
0
    if (ret == NULL) {
1686
0
        xmlURIErrMemory("escaping URI value\n");
1687
0
  return(NULL);
1688
0
    }
1689
0
    in = (const xmlChar *) str;
1690
0
    out = 0;
1691
0
    while(*in != 0) {
1692
0
  if (len - out <= 3) {
1693
0
            temp = xmlSaveUriRealloc(ret, &len);
1694
0
      if (temp == NULL) {
1695
0
                xmlURIErrMemory("escaping URI value\n");
1696
0
    xmlFree(ret);
1697
0
    return(NULL);
1698
0
      }
1699
0
      ret = temp;
1700
0
  }
1701
1702
0
  ch = *in;
1703
1704
0
  if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1705
0
      unsigned char val;
1706
0
      ret[out++] = '%';
1707
0
      val = ch >> 4;
1708
0
      if (val <= 9)
1709
0
    ret[out++] = '0' + val;
1710
0
      else
1711
0
    ret[out++] = 'A' + val - 0xA;
1712
0
      val = ch & 0xF;
1713
0
      if (val <= 9)
1714
0
    ret[out++] = '0' + val;
1715
0
      else
1716
0
    ret[out++] = 'A' + val - 0xA;
1717
0
      in++;
1718
0
  } else {
1719
0
      ret[out++] = *in++;
1720
0
  }
1721
1722
0
    }
1723
0
    ret[out] = 0;
1724
0
    return(ret);
1725
0
}
1726
1727
/**
1728
 * xmlURIEscape:
1729
 * @str:  the string of the URI to escape
1730
 *
1731
 * Escaping routine, does not do validity checks !
1732
 * It will try to escape the chars needing this, but this is heuristic
1733
 * based it's impossible to be sure.
1734
 *
1735
 * Returns an copy of the string, but escaped
1736
 *
1737
 * 25 May 2001
1738
 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1739
 * according to RFC2396.
1740
 *   - Carl Douglas
1741
 */
1742
xmlChar *
1743
xmlURIEscape(const xmlChar * str)
1744
0
{
1745
0
    xmlChar *ret, *segment = NULL;
1746
0
    xmlURIPtr uri;
1747
0
    int ret2;
1748
1749
0
#define NULLCHK(p) if(!p) { \
1750
0
         xmlURIErrMemory("escaping URI value\n"); \
1751
0
         xmlFreeURI(uri); \
1752
0
         return NULL; } \
1753
0
1754
0
    if (str == NULL)
1755
0
        return (NULL);
1756
1757
0
    uri = xmlCreateURI();
1758
0
    if (uri != NULL) {
1759
  /*
1760
   * Allow escaping errors in the unescaped form
1761
   */
1762
0
        uri->cleanup = 1;
1763
0
        ret2 = xmlParseURIReference(uri, (const char *)str);
1764
0
        if (ret2) {
1765
0
            xmlFreeURI(uri);
1766
0
            return (NULL);
1767
0
        }
1768
0
    }
1769
1770
0
    if (!uri)
1771
0
        return NULL;
1772
1773
0
    ret = NULL;
1774
1775
0
    if (uri->scheme) {
1776
0
        segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1777
0
        NULLCHK(segment)
1778
0
        ret = xmlStrcat(ret, segment);
1779
0
        ret = xmlStrcat(ret, BAD_CAST ":");
1780
0
        xmlFree(segment);
1781
0
    }
1782
1783
0
    if (uri->authority) {
1784
0
        segment =
1785
0
            xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1786
0
        NULLCHK(segment)
1787
0
        ret = xmlStrcat(ret, BAD_CAST "//");
1788
0
        ret = xmlStrcat(ret, segment);
1789
0
        xmlFree(segment);
1790
0
    }
1791
1792
0
    if (uri->user) {
1793
0
        segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1794
0
        NULLCHK(segment)
1795
0
    ret = xmlStrcat(ret,BAD_CAST "//");
1796
0
        ret = xmlStrcat(ret, segment);
1797
0
        ret = xmlStrcat(ret, BAD_CAST "@");
1798
0
        xmlFree(segment);
1799
0
    }
1800
1801
0
    if (uri->server) {
1802
0
        segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1803
0
        NULLCHK(segment)
1804
0
    if (uri->user == NULL)
1805
0
    ret = xmlStrcat(ret, BAD_CAST "//");
1806
0
        ret = xmlStrcat(ret, segment);
1807
0
        xmlFree(segment);
1808
0
    }
1809
1810
0
    if (uri->port) {
1811
0
        xmlChar port[10];
1812
1813
0
        snprintf((char *) port, 10, "%d", uri->port);
1814
0
        ret = xmlStrcat(ret, BAD_CAST ":");
1815
0
        ret = xmlStrcat(ret, port);
1816
0
    }
1817
1818
0
    if (uri->path) {
1819
0
        segment =
1820
0
            xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1821
0
        NULLCHK(segment)
1822
0
        ret = xmlStrcat(ret, segment);
1823
0
        xmlFree(segment);
1824
0
    }
1825
1826
0
    if (uri->query_raw) {
1827
0
        ret = xmlStrcat(ret, BAD_CAST "?");
1828
0
        ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1829
0
    }
1830
0
    else if (uri->query) {
1831
0
        segment =
1832
0
            xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1833
0
        NULLCHK(segment)
1834
0
        ret = xmlStrcat(ret, BAD_CAST "?");
1835
0
        ret = xmlStrcat(ret, segment);
1836
0
        xmlFree(segment);
1837
0
    }
1838
1839
0
    if (uri->opaque) {
1840
0
        segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1841
0
        NULLCHK(segment)
1842
0
        ret = xmlStrcat(ret, segment);
1843
0
        xmlFree(segment);
1844
0
    }
1845
1846
0
    if (uri->fragment) {
1847
0
        segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1848
0
        NULLCHK(segment)
1849
0
        ret = xmlStrcat(ret, BAD_CAST "#");
1850
0
        ret = xmlStrcat(ret, segment);
1851
0
        xmlFree(segment);
1852
0
    }
1853
1854
0
    xmlFreeURI(uri);
1855
0
#undef NULLCHK
1856
1857
0
    return (ret);
1858
0
}
1859
1860
/************************************************************************
1861
 *                  *
1862
 *      Public functions        *
1863
 *                  *
1864
 ************************************************************************/
1865
1866
/**
1867
 * xmlBuildURI:
1868
 * @URI:  the URI instance found in the document
1869
 * @base:  the base value
1870
 *
1871
 * Computes he final URI of the reference done by checking that
1872
 * the given URI is valid, and building the final URI using the
1873
 * base URI. This is processed according to section 5.2 of the
1874
 * RFC 2396
1875
 *
1876
 * 5.2. Resolving Relative References to Absolute Form
1877
 *
1878
 * Returns a new URI string (to be freed by the caller) or NULL in case
1879
 *         of error.
1880
 */
1881
xmlChar *
1882
8.71k
xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1883
8.71k
    xmlChar *val = NULL;
1884
8.71k
    int ret, len, indx, cur, out;
1885
8.71k
    xmlURIPtr ref = NULL;
1886
8.71k
    xmlURIPtr bas = NULL;
1887
8.71k
    xmlURIPtr res = NULL;
1888
1889
    /*
1890
     * 1) The URI reference is parsed into the potential four components and
1891
     *    fragment identifier, as described in Section 4.3.
1892
     *
1893
     *    NOTE that a completely empty URI is treated by modern browsers
1894
     *    as a reference to "." rather than as a synonym for the current
1895
     *    URI.  Should we do that here?
1896
     */
1897
8.71k
    if (URI == NULL)
1898
0
  ret = -1;
1899
8.71k
    else {
1900
8.71k
  if (*URI) {
1901
7.84k
      ref = xmlCreateURI();
1902
7.84k
      if (ref == NULL)
1903
0
    goto done;
1904
7.84k
      ret = xmlParseURIReference(ref, (const char *) URI);
1905
7.84k
  }
1906
869
  else
1907
869
      ret = 0;
1908
8.71k
    }
1909
8.71k
    if (ret != 0)
1910
1.57k
  goto done;
1911
7.14k
    if ((ref != NULL) && (ref->scheme != NULL)) {
1912
  /*
1913
   * The URI is absolute don't modify.
1914
   */
1915
43
  val = xmlStrdup(URI);
1916
43
  goto done;
1917
43
    }
1918
7.09k
    if (base == NULL)
1919
2.11k
  ret = -1;
1920
4.98k
    else {
1921
4.98k
  bas = xmlCreateURI();
1922
4.98k
  if (bas == NULL)
1923
0
      goto done;
1924
4.98k
  ret = xmlParseURIReference(bas, (const char *) base);
1925
4.98k
    }
1926
7.09k
    if (ret != 0) {
1927
2.11k
  if (ref)
1928
1.83k
      val = xmlSaveUri(ref);
1929
2.11k
  goto done;
1930
2.11k
    }
1931
4.98k
    if (ref == NULL) {
1932
  /*
1933
   * the base fragment must be ignored
1934
   */
1935
589
  if (bas->fragment != NULL) {
1936
0
      xmlFree(bas->fragment);
1937
0
      bas->fragment = NULL;
1938
0
  }
1939
589
  val = xmlSaveUri(bas);
1940
589
  goto done;
1941
589
    }
1942
1943
    /*
1944
     * 2) If the path component is empty and the scheme, authority, and
1945
     *    query components are undefined, then it is a reference to the
1946
     *    current document and we are done.  Otherwise, the reference URI's
1947
     *    query and fragment components are defined as found (or not found)
1948
     *    within the URI reference and not inherited from the base URI.
1949
     *
1950
     *    NOTE that in modern browsers, the parsing differs from the above
1951
     *    in the following aspect:  the query component is allowed to be
1952
     *    defined while still treating this as a reference to the current
1953
     *    document.
1954
     */
1955
4.39k
    res = xmlCreateURI();
1956
4.39k
    if (res == NULL)
1957
0
  goto done;
1958
4.39k
    if ((ref->scheme == NULL) && (ref->path == NULL) &&
1959
1.61k
  ((ref->authority == NULL) && (ref->server == NULL))) {
1960
330
  if (bas->scheme != NULL)
1961
0
      res->scheme = xmlMemStrdup(bas->scheme);
1962
330
  if (bas->authority != NULL)
1963
0
      res->authority = xmlMemStrdup(bas->authority);
1964
330
  else if ((bas->server != NULL) || (bas->port == -1)) {
1965
0
      if (bas->server != NULL)
1966
0
    res->server = xmlMemStrdup(bas->server);
1967
0
      if (bas->user != NULL)
1968
0
    res->user = xmlMemStrdup(bas->user);
1969
0
      res->port = bas->port;
1970
0
  }
1971
330
  if (bas->path != NULL)
1972
51
      res->path = xmlMemStrdup(bas->path);
1973
330
  if (ref->query_raw != NULL)
1974
84
      res->query_raw = xmlMemStrdup (ref->query_raw);
1975
246
  else if (ref->query != NULL)
1976
0
      res->query = xmlMemStrdup(ref->query);
1977
246
  else if (bas->query_raw != NULL)
1978
0
      res->query_raw = xmlMemStrdup(bas->query_raw);
1979
246
  else if (bas->query != NULL)
1980
0
      res->query = xmlMemStrdup(bas->query);
1981
330
  if (ref->fragment != NULL)
1982
253
      res->fragment = xmlMemStrdup(ref->fragment);
1983
330
  goto step_7;
1984
330
    }
1985
1986
    /*
1987
     * 3) If the scheme component is defined, indicating that the reference
1988
     *    starts with a scheme name, then the reference is interpreted as an
1989
     *    absolute URI and we are done.  Otherwise, the reference URI's
1990
     *    scheme is inherited from the base URI's scheme component.
1991
     */
1992
4.06k
    if (ref->scheme != NULL) {
1993
0
  val = xmlSaveUri(ref);
1994
0
  goto done;
1995
0
    }
1996
4.06k
    if (bas->scheme != NULL)
1997
0
  res->scheme = xmlMemStrdup(bas->scheme);
1998
1999
4.06k
    if (ref->query_raw != NULL)
2000
577
  res->query_raw = xmlMemStrdup(ref->query_raw);
2001
3.49k
    else if (ref->query != NULL)
2002
0
  res->query = xmlMemStrdup(ref->query);
2003
4.06k
    if (ref->fragment != NULL)
2004
1.25k
  res->fragment = xmlMemStrdup(ref->fragment);
2005
2006
    /*
2007
     * 4) If the authority component is defined, then the reference is a
2008
     *    network-path and we skip to step 7.  Otherwise, the reference
2009
     *    URI's authority is inherited from the base URI's authority
2010
     *    component, which will also be undefined if the URI scheme does not
2011
     *    use an authority component.
2012
     */
2013
4.06k
    if ((ref->authority != NULL) || (ref->server != NULL)) {
2014
1.62k
  if (ref->authority != NULL)
2015
0
      res->authority = xmlMemStrdup(ref->authority);
2016
1.62k
  else {
2017
1.62k
      res->server = xmlMemStrdup(ref->server);
2018
1.62k
      if (ref->user != NULL)
2019
968
    res->user = xmlMemStrdup(ref->user);
2020
1.62k
            res->port = ref->port;
2021
1.62k
  }
2022
1.62k
  if (ref->path != NULL)
2023
343
      res->path = xmlMemStrdup(ref->path);
2024
1.62k
  goto step_7;
2025
1.62k
    }
2026
2.44k
    if (bas->authority != NULL)
2027
0
  res->authority = xmlMemStrdup(bas->authority);
2028
2.44k
    else if ((bas->server != NULL) || (bas->port == -1)) {
2029
0
  if (bas->server != NULL)
2030
0
      res->server = xmlMemStrdup(bas->server);
2031
0
  if (bas->user != NULL)
2032
0
      res->user = xmlMemStrdup(bas->user);
2033
0
  res->port = bas->port;
2034
0
    }
2035
2036
    /*
2037
     * 5) If the path component begins with a slash character ("/"), then
2038
     *    the reference is an absolute-path and we skip to step 7.
2039
     */
2040
2.44k
    if ((ref->path != NULL) && (ref->path[0] == '/')) {
2041
375
  res->path = xmlMemStrdup(ref->path);
2042
375
  goto step_7;
2043
375
    }
2044
2045
2046
    /*
2047
     * 6) If this step is reached, then we are resolving a relative-path
2048
     *    reference.  The relative path needs to be merged with the base
2049
     *    URI's path.  Although there are many ways to do this, we will
2050
     *    describe a simple method using a separate string buffer.
2051
     *
2052
     * Allocate a buffer large enough for the result string.
2053
     */
2054
2.06k
    len = 2; /* extra / and 0 */
2055
2.06k
    if (ref->path != NULL)
2056
2.06k
  len += strlen(ref->path);
2057
2.06k
    if (bas->path != NULL)
2058
217
  len += strlen(bas->path);
2059
2.06k
    res->path = (char *) xmlMallocAtomic(len);
2060
2.06k
    if (res->path == NULL) {
2061
0
        xmlURIErrMemory("resolving URI against base\n");
2062
0
  goto done;
2063
0
    }
2064
2.06k
    res->path[0] = 0;
2065
2066
    /*
2067
     * a) All but the last segment of the base URI's path component is
2068
     *    copied to the buffer.  In other words, any characters after the
2069
     *    last (right-most) slash character, if any, are excluded.
2070
     */
2071
2.06k
    cur = 0;
2072
2.06k
    out = 0;
2073
2.06k
    if (bas->path != NULL) {
2074
868
  while (bas->path[cur] != 0) {
2075
8.89k
      while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2076
8.02k
    cur++;
2077
868
      if (bas->path[cur] == 0)
2078
217
    break;
2079
2080
651
      cur++;
2081
3.90k
      while (out < cur) {
2082
3.25k
    res->path[out] = bas->path[out];
2083
3.25k
    out++;
2084
3.25k
      }
2085
651
  }
2086
217
    }
2087
2.06k
    res->path[out] = 0;
2088
2089
    /*
2090
     * b) The reference's path component is appended to the buffer
2091
     *    string.
2092
     */
2093
2.06k
    if (ref->path != NULL && ref->path[0] != 0) {
2094
2.05k
  indx = 0;
2095
  /*
2096
   * Ensure the path includes a '/'
2097
   */
2098
2.05k
  if ((out == 0) && (bas->server != NULL))
2099
0
      res->path[out++] = '/';
2100
168k
  while (ref->path[indx] != 0) {
2101
166k
      res->path[out++] = ref->path[indx++];
2102
166k
  }
2103
2.05k
    }
2104
2.06k
    res->path[out] = 0;
2105
2106
    /*
2107
     * Steps c) to h) are really path normalization steps
2108
     */
2109
2.06k
    xmlNormalizeURIPath(res->path);
2110
2111
4.39k
step_7:
2112
2113
    /*
2114
     * 7) The resulting URI components, including any inherited from the
2115
     *    base URI, are recombined to give the absolute form of the URI
2116
     *    reference.
2117
     */
2118
4.39k
    val = xmlSaveUri(res);
2119
2120
8.71k
done:
2121
8.71k
    if (ref != NULL)
2122
7.84k
  xmlFreeURI(ref);
2123
8.71k
    if (bas != NULL)
2124
4.98k
  xmlFreeURI(bas);
2125
8.71k
    if (res != NULL)
2126
4.39k
  xmlFreeURI(res);
2127
8.71k
    return(val);
2128
4.39k
}
2129
2130
/**
2131
 * xmlBuildRelativeURI:
2132
 * @URI:  the URI reference under consideration
2133
 * @base:  the base value
2134
 *
2135
 * Expresses the URI of the reference in terms relative to the
2136
 * base.  Some examples of this operation include:
2137
 *     base = "http://site1.com/docs/book1.html"
2138
 *        URI input                        URI returned
2139
 *     docs/pic1.gif                    pic1.gif
2140
 *     docs/img/pic1.gif                img/pic1.gif
2141
 *     img/pic1.gif                     ../img/pic1.gif
2142
 *     http://site1.com/docs/pic1.gif   pic1.gif
2143
 *     http://site2.com/docs/pic1.gif   http://site2.com/docs/pic1.gif
2144
 *
2145
 *     base = "docs/book1.html"
2146
 *        URI input                        URI returned
2147
 *     docs/pic1.gif                    pic1.gif
2148
 *     docs/img/pic1.gif                img/pic1.gif
2149
 *     img/pic1.gif                     ../img/pic1.gif
2150
 *     http://site1.com/docs/pic1.gif   http://site1.com/docs/pic1.gif
2151
 *
2152
 *
2153
 * Note: if the URI reference is really wierd or complicated, it may be
2154
 *       worthwhile to first convert it into a "nice" one by calling
2155
 *       xmlBuildURI (using 'base') before calling this routine,
2156
 *       since this routine (for reasonable efficiency) assumes URI has
2157
 *       already been through some validation.
2158
 *
2159
 * Returns a new URI string (to be freed by the caller) or NULL in case
2160
 * error.
2161
 */
2162
xmlChar *
2163
xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2164
0
{
2165
0
    xmlChar *val = NULL;
2166
0
    int ret;
2167
0
    int ix;
2168
0
    int nbslash = 0;
2169
0
    int len;
2170
0
    xmlURIPtr ref = NULL;
2171
0
    xmlURIPtr bas = NULL;
2172
0
    xmlChar *bptr, *uptr, *vptr;
2173
0
    int remove_path = 0;
2174
2175
0
    if ((URI == NULL) || (*URI == 0))
2176
0
  return NULL;
2177
2178
    /*
2179
     * First parse URI into a standard form
2180
     */
2181
0
    ref = xmlCreateURI ();
2182
0
    if (ref == NULL)
2183
0
  return NULL;
2184
    /* If URI not already in "relative" form */
2185
0
    if (URI[0] != '.') {
2186
0
  ret = xmlParseURIReference (ref, (const char *) URI);
2187
0
  if (ret != 0)
2188
0
      goto done;   /* Error in URI, return NULL */
2189
0
    } else
2190
0
  ref->path = (char *)xmlStrdup(URI);
2191
2192
    /*
2193
     * Next parse base into the same standard form
2194
     */
2195
0
    if ((base == NULL) || (*base == 0)) {
2196
0
  val = xmlStrdup (URI);
2197
0
  goto done;
2198
0
    }
2199
0
    bas = xmlCreateURI ();
2200
0
    if (bas == NULL)
2201
0
  goto done;
2202
0
    if (base[0] != '.') {
2203
0
  ret = xmlParseURIReference (bas, (const char *) base);
2204
0
  if (ret != 0)
2205
0
      goto done;   /* Error in base, return NULL */
2206
0
    } else
2207
0
  bas->path = (char *)xmlStrdup(base);
2208
2209
    /*
2210
     * If the scheme / server on the URI differs from the base,
2211
     * just return the URI
2212
     */
2213
0
    if ((ref->scheme != NULL) &&
2214
0
  ((bas->scheme == NULL) ||
2215
0
   (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2216
0
   (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
2217
0
  val = xmlStrdup (URI);
2218
0
  goto done;
2219
0
    }
2220
0
    if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2221
0
  val = xmlStrdup(BAD_CAST "");
2222
0
  goto done;
2223
0
    }
2224
0
    if (bas->path == NULL) {
2225
0
  val = xmlStrdup((xmlChar *)ref->path);
2226
0
  goto done;
2227
0
    }
2228
0
    if (ref->path == NULL) {
2229
0
        ref->path = (char *) "/";
2230
0
  remove_path = 1;
2231
0
    }
2232
2233
    /*
2234
     * At this point (at last!) we can compare the two paths
2235
     *
2236
     * First we take care of the special case where either of the
2237
     * two path components may be missing (bug 316224)
2238
     */
2239
0
    if (bas->path == NULL) {
2240
0
  if (ref->path != NULL) {
2241
0
      uptr = (xmlChar *) ref->path;
2242
0
      if (*uptr == '/')
2243
0
    uptr++;
2244
      /* exception characters from xmlSaveUri */
2245
0
      val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2246
0
  }
2247
0
  goto done;
2248
0
    }
2249
0
    bptr = (xmlChar *)bas->path;
2250
0
    if (ref->path == NULL) {
2251
0
  for (ix = 0; bptr[ix] != 0; ix++) {
2252
0
      if (bptr[ix] == '/')
2253
0
    nbslash++;
2254
0
  }
2255
0
  uptr = NULL;
2256
0
  len = 1;  /* this is for a string terminator only */
2257
0
    } else {
2258
0
        xmlChar *rptr = (xmlChar *) ref->path;
2259
0
        int pos = 0;
2260
2261
        /*
2262
         * Next we compare the two strings and find where they first differ
2263
         */
2264
0
  if ((*rptr == '.') && (rptr[1] == '/'))
2265
0
            rptr += 2;
2266
0
  if ((*bptr == '.') && (bptr[1] == '/'))
2267
0
            bptr += 2;
2268
0
  else if ((*bptr == '/') && (*rptr != '/'))
2269
0
      bptr++;
2270
0
  while ((bptr[pos] == rptr[pos]) && (bptr[pos] != 0))
2271
0
      pos++;
2272
2273
0
  if (bptr[pos] == rptr[pos]) {
2274
0
      val = xmlStrdup(BAD_CAST "");
2275
0
      goto done;    /* (I can't imagine why anyone would do this) */
2276
0
  }
2277
2278
  /*
2279
   * In URI, "back up" to the last '/' encountered.  This will be the
2280
   * beginning of the "unique" suffix of URI
2281
   */
2282
0
  ix = pos;
2283
0
  if ((rptr[ix] == '/') && (ix > 0))
2284
0
      ix--;
2285
0
  else if ((rptr[ix] == 0) && (ix > 1) && (rptr[ix - 1] == '/'))
2286
0
      ix -= 2;
2287
0
  for (; ix > 0; ix--) {
2288
0
      if (rptr[ix] == '/')
2289
0
    break;
2290
0
  }
2291
0
  if (ix == 0) {
2292
0
      uptr = (xmlChar *)rptr;
2293
0
  } else {
2294
0
      ix++;
2295
0
      uptr = (xmlChar *)&rptr[ix];
2296
0
  }
2297
2298
  /*
2299
   * In base, count the number of '/' from the differing point
2300
   */
2301
0
  if (bptr[pos] != rptr[pos]) {/* check for trivial URI == base */
2302
0
      for (; bptr[ix] != 0; ix++) {
2303
0
    if (bptr[ix] == '/')
2304
0
        nbslash++;
2305
0
      }
2306
0
  }
2307
0
  len = xmlStrlen (uptr) + 1;
2308
0
    }
2309
2310
0
    if (nbslash == 0) {
2311
0
  if (uptr != NULL)
2312
      /* exception characters from xmlSaveUri */
2313
0
      val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2314
0
  goto done;
2315
0
    }
2316
2317
    /*
2318
     * Allocate just enough space for the returned string -
2319
     * length of the remainder of the URI, plus enough space
2320
     * for the "../" groups, plus one for the terminator
2321
     */
2322
0
    val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2323
0
    if (val == NULL) {
2324
0
        xmlURIErrMemory("building relative URI\n");
2325
0
  goto done;
2326
0
    }
2327
0
    vptr = val;
2328
    /*
2329
     * Put in as many "../" as needed
2330
     */
2331
0
    for (; nbslash>0; nbslash--) {
2332
0
  *vptr++ = '.';
2333
0
  *vptr++ = '.';
2334
0
  *vptr++ = '/';
2335
0
    }
2336
    /*
2337
     * Finish up with the end of the URI
2338
     */
2339
0
    if (uptr != NULL) {
2340
0
        if ((vptr > val) && (len > 0) &&
2341
0
      (uptr[0] == '/') && (vptr[-1] == '/')) {
2342
0
      memcpy (vptr, uptr + 1, len - 1);
2343
0
      vptr[len - 2] = 0;
2344
0
  } else {
2345
0
      memcpy (vptr, uptr, len);
2346
0
      vptr[len - 1] = 0;
2347
0
  }
2348
0
    } else {
2349
0
  vptr[len - 1] = 0;
2350
0
    }
2351
2352
    /* escape the freshly-built path */
2353
0
    vptr = val;
2354
  /* exception characters from xmlSaveUri */
2355
0
    val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2356
0
    xmlFree(vptr);
2357
2358
0
done:
2359
    /*
2360
     * Free the working variables
2361
     */
2362
0
    if (remove_path != 0)
2363
0
        ref->path = NULL;
2364
0
    if (ref != NULL)
2365
0
  xmlFreeURI (ref);
2366
0
    if (bas != NULL)
2367
0
  xmlFreeURI (bas);
2368
2369
0
    return val;
2370
0
}
2371
2372
/**
2373
 * xmlCanonicPath:
2374
 * @path:  the resource locator in a filesystem notation
2375
 *
2376
 * Constructs a canonic path from the specified path.
2377
 *
2378
 * Returns a new canonic path, or a duplicate of the path parameter if the
2379
 * construction fails. The caller is responsible for freeing the memory occupied
2380
 * by the returned string. If there is insufficient memory available, or the
2381
 * argument is NULL, the function returns NULL.
2382
 */
2383
#define IS_WINDOWS_PATH(p)          \
2384
  ((p != NULL) &&           \
2385
   (((p[0] >= 'a') && (p[0] <= 'z')) ||     \
2386
    ((p[0] >= 'A') && (p[0] <= 'Z'))) &&      \
2387
   (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2388
xmlChar *
2389
xmlCanonicPath(const xmlChar *path)
2390
96.8k
{
2391
/*
2392
 * For Windows implementations, additional work needs to be done to
2393
 * replace backslashes in pathnames with "forward slashes"
2394
 */
2395
#if defined(_WIN32) && !defined(__CYGWIN__)
2396
    int len = 0;
2397
    char *p = NULL;
2398
#endif
2399
96.8k
    xmlURIPtr uri;
2400
96.8k
    xmlChar *ret;
2401
96.8k
    const xmlChar *absuri;
2402
2403
96.8k
    if (path == NULL)
2404
0
  return(NULL);
2405
2406
#if defined(_WIN32)
2407
    /*
2408
     * We must not change the backslashes to slashes if the the path
2409
     * starts with \\?\
2410
     * Those paths can be up to 32k characters long.
2411
     * Was added specifically for OpenOffice, those paths can't be converted
2412
     * to URIs anyway.
2413
     */
2414
    if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
2415
        (path[3] == '\\') )
2416
  return xmlStrdup((const xmlChar *) path);
2417
#endif
2418
2419
  /* sanitize filename starting with // so it can be used as URI */
2420
96.8k
    if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2421
0
        path++;
2422
2423
96.8k
    if ((uri = xmlParseURI((const char *) path)) != NULL) {
2424
96.8k
  xmlFreeURI(uri);
2425
96.8k
  return xmlStrdup(path);
2426
96.8k
    }
2427
2428
    /* Check if this is an "absolute uri" */
2429
0
    absuri = xmlStrstr(path, BAD_CAST "://");
2430
0
    if (absuri != NULL) {
2431
0
        int l, j;
2432
0
  unsigned char c;
2433
0
  xmlChar *escURI;
2434
2435
        /*
2436
   * this looks like an URI where some parts have not been
2437
   * escaped leading to a parsing problem.  Check that the first
2438
   * part matches a protocol.
2439
   */
2440
0
  l = absuri - path;
2441
  /* Bypass if first part (part before the '://') is > 20 chars */
2442
0
  if ((l <= 0) || (l > 20))
2443
0
      goto path_processing;
2444
  /* Bypass if any non-alpha characters are present in first part */
2445
0
  for (j = 0;j < l;j++) {
2446
0
      c = path[j];
2447
0
      if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2448
0
          goto path_processing;
2449
0
  }
2450
2451
  /* Escape all except the characters specified in the supplied path */
2452
0
        escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2453
0
  if (escURI != NULL) {
2454
      /* Try parsing the escaped path */
2455
0
      uri = xmlParseURI((const char *) escURI);
2456
      /* If successful, return the escaped string */
2457
0
      if (uri != NULL) {
2458
0
          xmlFreeURI(uri);
2459
0
    return escURI;
2460
0
      }
2461
0
            xmlFree(escURI);
2462
0
  }
2463
0
    }
2464
2465
0
path_processing:
2466
/* For Windows implementations, replace backslashes with 'forward slashes' */
2467
#if defined(_WIN32) && !defined(__CYGWIN__)
2468
    /*
2469
     * Create a URI structure
2470
     */
2471
    uri = xmlCreateURI();
2472
    if (uri == NULL) {    /* Guard against 'out of memory' */
2473
        return(NULL);
2474
    }
2475
2476
    len = xmlStrlen(path);
2477
    if ((len > 2) && IS_WINDOWS_PATH(path)) {
2478
        /* make the scheme 'file' */
2479
  uri->scheme = (char *) xmlStrdup(BAD_CAST "file");
2480
  /* allocate space for leading '/' + path + string terminator */
2481
  uri->path = xmlMallocAtomic(len + 2);
2482
  if (uri->path == NULL) {
2483
      xmlFreeURI(uri);  /* Guard agains 'out of memory' */
2484
      return(NULL);
2485
  }
2486
  /* Put in leading '/' plus path */
2487
  uri->path[0] = '/';
2488
  p = uri->path + 1;
2489
  strncpy(p, (char *) path, len + 1);
2490
    } else {
2491
  uri->path = (char *) xmlStrdup(path);
2492
  if (uri->path == NULL) {
2493
      xmlFreeURI(uri);
2494
      return(NULL);
2495
  }
2496
  p = uri->path;
2497
    }
2498
    /* Now change all occurences of '\' to '/' */
2499
    while (*p != '\0') {
2500
  if (*p == '\\')
2501
      *p = '/';
2502
  p++;
2503
    }
2504
2505
    if (uri->scheme == NULL) {
2506
  ret = xmlStrdup((const xmlChar *) uri->path);
2507
    } else {
2508
  ret = xmlSaveUri(uri);
2509
    }
2510
2511
    xmlFreeURI(uri);
2512
#else
2513
0
    ret = xmlStrdup((const xmlChar *) path);
2514
0
#endif
2515
0
    return(ret);
2516
0
}
2517
2518
/**
2519
 * xmlPathToURI:
2520
 * @path:  the resource locator in a filesystem notation
2521
 *
2522
 * Constructs an URI expressing the existing path
2523
 *
2524
 * Returns a new URI, or a duplicate of the path parameter if the
2525
 * construction fails. The caller is responsible for freeing the memory
2526
 * occupied by the returned string. If there is insufficient memory available,
2527
 * or the argument is NULL, the function returns NULL.
2528
 */
2529
xmlChar *
2530
xmlPathToURI(const xmlChar *path)
2531
94.4k
{
2532
94.4k
    xmlURIPtr uri;
2533
94.4k
    xmlURI temp;
2534
94.4k
    xmlChar *ret, *cal;
2535
2536
94.4k
    if (path == NULL)
2537
0
        return(NULL);
2538
2539
94.4k
    if ((uri = xmlParseURI((const char *) path)) != NULL) {
2540
94.4k
  xmlFreeURI(uri);
2541
94.4k
  return xmlStrdup(path);
2542
94.4k
    }
2543
0
    cal = xmlCanonicPath(path);
2544
0
    if (cal == NULL)
2545
0
        return(NULL);
2546
#if defined(_WIN32) && !defined(__CYGWIN__)
2547
    /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2548
       If 'cal' is a valid URI allready then we are done here, as continuing would make
2549
       it invalid. */
2550
    if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2551
  xmlFreeURI(uri);
2552
  return cal;
2553
    }
2554
    /* 'cal' can contain a relative path with backslashes. If that is processed
2555
       by xmlSaveURI, they will be escaped and the external entity loader machinery
2556
       will fail. So convert them to slashes. Misuse 'ret' for walking. */
2557
    ret = cal;
2558
    while (*ret != '\0') {
2559
  if (*ret == '\\')
2560
      *ret = '/';
2561
  ret++;
2562
    }
2563
#endif
2564
0
    memset(&temp, 0, sizeof(temp));
2565
0
    temp.path = (char *) cal;
2566
0
    ret = xmlSaveUri(&temp);
2567
0
    xmlFree(cal);
2568
0
    return(ret);
2569
0
}
2570
#define bottom_uri
2571
#include "elfgcchack.h"