Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /**  | 
2  |  |  * uri.c: set of generic URI related routines  | 
3  |  |  *  | 
4  |  |  * Reference: RFCs 3986, 2732 and 2373  | 
5  |  |  *  | 
6  |  |  * See Copyright for the status of this software.  | 
7  |  |  *  | 
8  |  |  * daniel@veillard.com  | 
9  |  |  */  | 
10  |  |  | 
11  |  | #define IN_LIBXML  | 
12  |  | #include "libxml.h"  | 
13  |  |  | 
14  |  | #include <limits.h>  | 
15  |  | #include <string.h>  | 
16  |  |  | 
17  |  | #include <libxml/xmlmemory.h>  | 
18  |  | #include <libxml/uri.h>  | 
19  |  | #include <libxml/globals.h>  | 
20  |  | #include <libxml/xmlerror.h>  | 
21  |  |  | 
22  |  | #include "private/error.h"  | 
23  |  |  | 
24  |  | /**  | 
25  |  |  * MAX_URI_LENGTH:  | 
26  |  |  *  | 
27  |  |  * The definition of the URI regexp in the above RFC has no size limit  | 
28  |  |  * In practice they are usually relatively short except for the  | 
29  |  |  * data URI scheme as defined in RFC 2397. Even for data URI the usual  | 
30  |  |  * maximum size before hitting random practical limits is around 64 KB  | 
31  |  |  * and 4KB is usually a maximum admitted limit for proper operations.  | 
32  |  |  * The value below is more a security limit than anything else and  | 
33  |  |  * really should never be hit by 'normal' operations  | 
34  |  |  * Set to 1 MByte in 2012, this is only enforced on output  | 
35  |  |  */  | 
36  | 151k  | #define MAX_URI_LENGTH 1024 * 1024  | 
37  |  |  | 
38  | 9.24M  | #define PORT_EMPTY           0  | 
39  | 96.8k  | #define PORT_EMPTY_SERVER   -1  | 
40  |  |  | 
41  |  | static void  | 
42  |  | xmlURIErrMemory(const char *extra)  | 
43  | 0  | { | 
44  | 0  |     if (extra)  | 
45  | 0  |         __xmlRaiseError(NULL, NULL, NULL,  | 
46  | 0  |                         NULL, NULL, XML_FROM_URI,  | 
47  | 0  |                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,  | 
48  | 0  |                         extra, NULL, NULL, 0, 0,  | 
49  | 0  |                         "Memory allocation failed : %s\n", extra);  | 
50  | 0  |     else  | 
51  | 0  |         __xmlRaiseError(NULL, NULL, NULL,  | 
52  | 0  |                         NULL, NULL, XML_FROM_URI,  | 
53  | 0  |                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,  | 
54  | 0  |                         NULL, NULL, NULL, 0, 0,  | 
55  | 0  |                         "Memory allocation failed\n");  | 
56  | 0  | }  | 
57  |  |  | 
58  |  | static void xmlCleanURI(xmlURIPtr uri);  | 
59  |  |  | 
60  |  | /*  | 
61  |  |  * Old rule from 2396 used in legacy handling code  | 
62  |  |  * alpha    = lowalpha | upalpha  | 
63  |  |  */  | 
64  | 168M  | #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))  | 
65  |  |  | 
66  |  |  | 
67  |  | /*  | 
68  |  |  * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |  | 
69  |  |  *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |  | 
70  |  |  *            "u" | "v" | "w" | "x" | "y" | "z"  | 
71  |  |  */  | 
72  |  |  | 
73  | 168M  | #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))  | 
74  |  |  | 
75  |  | /*  | 
76  |  |  * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |  | 
77  |  |  *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |  | 
78  |  |  *           "U" | "V" | "W" | "X" | "Y" | "Z"  | 
79  |  |  */  | 
80  | 65.4M  | #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))  | 
81  |  |  | 
82  |  | #ifdef IS_DIGIT  | 
83  |  | #undef IS_DIGIT  | 
84  |  | #endif  | 
85  |  | /*  | 
86  |  |  * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"  | 
87  |  |  */  | 
88  | 60.9M  | #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))  | 
89  |  |  | 
90  |  | /*  | 
91  |  |  * alphanum = alpha | digit  | 
92  |  |  */  | 
93  |  |  | 
94  | 168M  | #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))  | 
95  |  |  | 
96  |  | /*  | 
97  |  |  * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" | 
98  |  |  */  | 
99  |  |  | 
100  | 56.1M  | #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||     \  | 
101  | 56.1M  |     ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||    \  | 
102  | 56.1M  |     ((x) == '(') || ((x) == ')')) | 
103  |  |  | 
104  |  | /*  | 
105  |  |  * unwise = "{" | "}" | "|" | "\" | "^" | "`" | 
106  |  |  */  | 
107  |  |  | 
108  |  | #define IS_UNWISE(p)                                                    \  | 
109  | 0  |       (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \ | 
110  | 0  |        ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||        \  | 
111  | 0  |        ((*(p) == ']')) || ((*(p) == '`')))  | 
112  |  | /*  | 
113  |  |  * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |  | 
114  |  |  *            "[" | "]"  | 
115  |  |  */  | 
116  |  |  | 
117  | 584k  | #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \  | 
118  | 584k  |         ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \  | 
119  | 584k  |         ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \  | 
120  | 584k  |         ((x) == ']'))  | 
121  |  |  | 
122  |  | /*  | 
123  |  |  * unreserved = alphanum | mark  | 
124  |  |  */  | 
125  |  |  | 
126  | 84.4M  | #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))  | 
127  |  |  | 
128  |  | /*  | 
129  |  |  * Skip to next pointer char, handle escaped sequences  | 
130  |  |  */  | 
131  |  |  | 
132  | 172M  | #define NEXT(p) ((*p == '%')? p += 3 : p++)  | 
133  |  |  | 
134  |  | /*  | 
135  |  |  * Productions from the spec.  | 
136  |  |  *  | 
137  |  |  *    authority     = server | reg_name  | 
138  |  |  *    reg_name      = 1*( unreserved | escaped | "$" | "," |  | 
139  |  |  *                        ";" | ":" | "@" | "&" | "=" | "+" )  | 
140  |  |  *  | 
141  |  |  * path          = [ abs_path | opaque_part ]  | 
142  |  |  */  | 
143  |  |  | 
144  | 6.40M  | #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))  | 
145  |  |  | 
146  |  | /************************************************************************  | 
147  |  |  *                  *  | 
148  |  |  *                         RFC 3986 parser        *  | 
149  |  |  *                  *  | 
150  |  |  ************************************************************************/  | 
151  |  |  | 
152  | 195M  | #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))  | 
153  | 245M  | #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||   \  | 
154  | 245M  |                       ((*(p) >= 'A') && (*(p) <= 'Z')))  | 
155  |  | #define ISA_HEXDIG(p)             \  | 
156  | 17.6M  |        (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||   \  | 
157  | 17.6M  |         ((*(p) >= 'A') && (*(p) <= 'F')))  | 
158  |  |  | 
159  |  | /*  | 
160  |  |  *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")" | 
161  |  |  *                     / "*" / "+" / "," / ";" / "="  | 
162  |  |  */  | 
163  |  | #define ISA_SUB_DELIM(p)            \  | 
164  | 277M  |       (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||   \  | 
165  | 100M  |        ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||   \ | 
166  | 100M  |        ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||   \  | 
167  | 100M  |        ((*(p) == '=')) || ((*(p) == '\'')))  | 
168  |  |  | 
169  |  | /*  | 
170  |  |  *    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"  | 
171  |  |  */  | 
172  |  | #define ISA_GEN_DELIM(p)            \  | 
173  |  |       (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) ||         \  | 
174  |  |        ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) ||         \  | 
175  |  |        ((*(p) == '@')))  | 
176  |  |  | 
177  |  | /*  | 
178  |  |  *    reserved      = gen-delims / sub-delims  | 
179  |  |  */  | 
180  |  | #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))  | 
181  |  |  | 
182  |  | /*  | 
183  |  |  *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"  | 
184  |  |  */  | 
185  |  | #define ISA_UNRESERVED(p)           \  | 
186  | 376M  |       ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||   \  | 
187  | 188M  |        ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))  | 
188  |  |  | 
189  |  | /*  | 
190  |  |  *    pct-encoded   = "%" HEXDIG HEXDIG  | 
191  |  |  */  | 
192  |  | #define ISA_PCT_ENCODED(p)            \  | 
193  | 297M  |      ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))  | 
194  |  |  | 
195  |  | /*  | 
196  |  |  *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"  | 
197  |  |  */  | 
198  |  | #define ISA_PCHAR(p)              \  | 
199  | 293M  |      (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||  \  | 
200  | 163M  |       ((*(p) == ':')) || ((*(p) == '@')))  | 
201  |  |  | 
202  |  | /**  | 
203  |  |  * xmlParse3986Scheme:  | 
204  |  |  * @uri:  pointer to an URI structure  | 
205  |  |  * @str:  pointer to the string to analyze  | 
206  |  |  *  | 
207  |  |  * Parse an URI scheme  | 
208  |  |  *  | 
209  |  |  * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )  | 
210  |  |  *  | 
211  |  |  * Returns 0 or the error code  | 
212  |  |  */  | 
213  |  | static int  | 
214  | 7.14M  | xmlParse3986Scheme(xmlURIPtr uri, const char **str) { | 
215  | 7.14M  |     const char *cur;  | 
216  |  |  | 
217  | 7.14M  |     if (str == NULL)  | 
218  | 0  |   return(-1);  | 
219  |  |  | 
220  | 7.14M  |     cur = *str;  | 
221  | 7.14M  |     if (!ISA_ALPHA(cur))  | 
222  | 917k  |   return(2);  | 
223  | 6.23M  |     cur++;  | 
224  | 25.1M  |     while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||  | 
225  | 25.1M  |            (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;  | 
226  | 6.23M  |     if (uri != NULL) { | 
227  | 6.23M  |   if (uri->scheme != NULL) xmlFree(uri->scheme);  | 
228  | 6.23M  |   uri->scheme = STRNDUP(*str, cur - *str);  | 
229  | 6.23M  |     }  | 
230  | 6.23M  |     *str = cur;  | 
231  | 6.23M  |     return(0);  | 
232  | 7.14M  | }  | 
233  |  |  | 
234  |  | /**  | 
235  |  |  * xmlParse3986Fragment:  | 
236  |  |  * @uri:  pointer to an URI structure  | 
237  |  |  * @str:  pointer to the string to analyze  | 
238  |  |  *  | 
239  |  |  * Parse the query part of an URI  | 
240  |  |  *  | 
241  |  |  * fragment      = *( pchar / "/" / "?" )  | 
242  |  |  * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'  | 
243  |  |  *       in the fragment identifier but this is used very broadly for  | 
244  |  |  *       xpointer scheme selection, so we are allowing it here to not break  | 
245  |  |  *       for example all the DocBook processing chains.  | 
246  |  |  *  | 
247  |  |  * Returns 0 or the error code  | 
248  |  |  */  | 
249  |  | static int  | 
250  |  | xmlParse3986Fragment(xmlURIPtr uri, const char **str)  | 
251  | 126k  | { | 
252  | 126k  |     const char *cur;  | 
253  |  |  | 
254  | 126k  |     if (str == NULL)  | 
255  | 0  |         return (-1);  | 
256  |  |  | 
257  | 126k  |     cur = *str;  | 
258  |  |  | 
259  | 7.80M  |     while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||  | 
260  | 7.80M  |            (*cur == '[') || (*cur == ']') ||  | 
261  | 7.80M  |            ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))  | 
262  | 7.68M  |         NEXT(cur);  | 
263  | 126k  |     if (uri != NULL) { | 
264  | 126k  |         if (uri->fragment != NULL)  | 
265  | 0  |             xmlFree(uri->fragment);  | 
266  | 126k  |   if (uri->cleanup & 2)  | 
267  | 0  |       uri->fragment = STRNDUP(*str, cur - *str);  | 
268  | 126k  |   else  | 
269  | 126k  |       uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);  | 
270  | 126k  |     }  | 
271  | 126k  |     *str = cur;  | 
272  | 126k  |     return (0);  | 
273  | 126k  | }  | 
274  |  |  | 
275  |  | /**  | 
276  |  |  * xmlParse3986Query:  | 
277  |  |  * @uri:  pointer to an URI structure  | 
278  |  |  * @str:  pointer to the string to analyze  | 
279  |  |  *  | 
280  |  |  * Parse the query part of an URI  | 
281  |  |  *  | 
282  |  |  * query = *uric  | 
283  |  |  *  | 
284  |  |  * Returns 0 or the error code  | 
285  |  |  */  | 
286  |  | static int  | 
287  |  | xmlParse3986Query(xmlURIPtr uri, const char **str)  | 
288  | 176k  | { | 
289  | 176k  |     const char *cur;  | 
290  |  |  | 
291  | 176k  |     if (str == NULL)  | 
292  | 0  |         return (-1);  | 
293  |  |  | 
294  | 176k  |     cur = *str;  | 
295  |  |  | 
296  | 10.3M  |     while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||  | 
297  | 10.3M  |            ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))  | 
298  | 10.1M  |         NEXT(cur);  | 
299  | 176k  |     if (uri != NULL) { | 
300  | 176k  |         if (uri->query != NULL)  | 
301  | 0  |             xmlFree(uri->query);  | 
302  | 176k  |   if (uri->cleanup & 2)  | 
303  | 0  |       uri->query = STRNDUP(*str, cur - *str);  | 
304  | 176k  |   else  | 
305  | 176k  |       uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);  | 
306  |  |  | 
307  |  |   /* Save the raw bytes of the query as well.  | 
308  |  |    * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114  | 
309  |  |    */  | 
310  | 176k  |   if (uri->query_raw != NULL)  | 
311  | 0  |       xmlFree (uri->query_raw);  | 
312  | 176k  |   uri->query_raw = STRNDUP (*str, cur - *str);  | 
313  | 176k  |     }  | 
314  | 176k  |     *str = cur;  | 
315  | 176k  |     return (0);  | 
316  | 176k  | }  | 
317  |  |  | 
318  |  | /**  | 
319  |  |  * xmlParse3986Port:  | 
320  |  |  * @uri:  pointer to an URI structure  | 
321  |  |  * @str:  the string to analyze  | 
322  |  |  *  | 
323  |  |  * Parse a port part and fills in the appropriate fields  | 
324  |  |  * of the @uri structure  | 
325  |  |  *  | 
326  |  |  * port          = *DIGIT  | 
327  |  |  *  | 
328  |  |  * Returns 0 or the error code  | 
329  |  |  */  | 
330  |  | static int  | 
331  |  | xmlParse3986Port(xmlURIPtr uri, const char **str)  | 
332  | 23.4k  | { | 
333  | 23.4k  |     const char *cur = *str;  | 
334  | 23.4k  |     int port = 0;  | 
335  |  |  | 
336  | 23.4k  |     if (ISA_DIGIT(cur)) { | 
337  | 38.4k  |   while (ISA_DIGIT(cur)) { | 
338  | 31.0k  |             int digit = *cur - '0';  | 
339  |  |  | 
340  | 31.0k  |             if (port > INT_MAX / 10)  | 
341  | 1.06k  |                 return(1);  | 
342  | 30.0k  |             port *= 10;  | 
343  | 30.0k  |             if (port > INT_MAX - digit)  | 
344  | 0  |                 return(1);  | 
345  | 30.0k  |       port += digit;  | 
346  |  |  | 
347  | 30.0k  |       cur++;  | 
348  | 30.0k  |   }  | 
349  | 7.34k  |   if (uri != NULL)  | 
350  | 7.34k  |       uri->port = port;  | 
351  | 7.34k  |   *str = cur;  | 
352  | 7.34k  |   return(0);  | 
353  | 8.40k  |     }  | 
354  | 15.0k  |     return(1);  | 
355  | 23.4k  | }  | 
356  |  |  | 
357  |  | /**  | 
358  |  |  * xmlParse3986Userinfo:  | 
359  |  |  * @uri:  pointer to an URI structure  | 
360  |  |  * @str:  the string to analyze  | 
361  |  |  *  | 
362  |  |  * Parse an user information part and fills in the appropriate fields  | 
363  |  |  * of the @uri structure  | 
364  |  |  *  | 
365  |  |  * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )  | 
366  |  |  *  | 
367  |  |  * Returns 0 or the error code  | 
368  |  |  */  | 
369  |  | static int  | 
370  |  | xmlParse3986Userinfo(xmlURIPtr uri, const char **str)  | 
371  | 679k  | { | 
372  | 679k  |     const char *cur;  | 
373  |  |  | 
374  | 679k  |     cur = *str;  | 
375  | 13.9M  |     while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||  | 
376  | 13.9M  |            ISA_SUB_DELIM(cur) || (*cur == ':'))  | 
377  | 13.3M  |   NEXT(cur);  | 
378  | 679k  |     if (*cur == '@') { | 
379  | 67.7k  |   if (uri != NULL) { | 
380  | 67.7k  |       if (uri->user != NULL) xmlFree(uri->user);  | 
381  | 67.7k  |       if (uri->cleanup & 2)  | 
382  | 0  |     uri->user = STRNDUP(*str, cur - *str);  | 
383  | 67.7k  |       else  | 
384  | 67.7k  |     uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);  | 
385  | 67.7k  |   }  | 
386  | 67.7k  |   *str = cur;  | 
387  | 67.7k  |   return(0);  | 
388  | 67.7k  |     }  | 
389  | 611k  |     return(1);  | 
390  | 679k  | }  | 
391  |  |  | 
392  |  | /**  | 
393  |  |  * xmlParse3986DecOctet:  | 
394  |  |  * @str:  the string to analyze  | 
395  |  |  *  | 
396  |  |  *    dec-octet     = DIGIT                 ; 0-9  | 
397  |  |  *                  / %x31-39 DIGIT         ; 10-99  | 
398  |  |  *                  / "1" 2DIGIT            ; 100-199  | 
399  |  |  *                  / "2" %x30-34 DIGIT     ; 200-249  | 
400  |  |  *                  / "25" %x30-35          ; 250-255  | 
401  |  |  *  | 
402  |  |  * Skip a dec-octet.  | 
403  |  |  *  | 
404  |  |  * Returns 0 if found and skipped, 1 otherwise  | 
405  |  |  */  | 
406  |  | static int  | 
407  | 7.18k  | xmlParse3986DecOctet(const char **str) { | 
408  | 7.18k  |     const char *cur = *str;  | 
409  |  |  | 
410  | 7.18k  |     if (!(ISA_DIGIT(cur)))  | 
411  | 1.57k  |         return(1);  | 
412  | 5.61k  |     if (!ISA_DIGIT(cur+1))  | 
413  | 1.88k  |   cur++;  | 
414  | 3.72k  |     else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))  | 
415  | 3.21k  |   cur += 2;  | 
416  | 512  |     else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))  | 
417  | 23  |   cur += 3;  | 
418  | 489  |     else if ((*cur == '2') && (*(cur + 1) >= '0') &&  | 
419  | 489  |        (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))  | 
420  | 300  |   cur += 3;  | 
421  | 189  |     else if ((*cur == '2') && (*(cur + 1) == '5') &&  | 
422  | 189  |        (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))  | 
423  | 0  |   cur += 3;  | 
424  | 189  |     else  | 
425  | 189  |         return(1);  | 
426  | 5.42k  |     *str = cur;  | 
427  | 5.42k  |     return(0);  | 
428  | 5.61k  | }  | 
429  |  | /**  | 
430  |  |  * xmlParse3986Host:  | 
431  |  |  * @uri:  pointer to an URI structure  | 
432  |  |  * @str:  the string to analyze  | 
433  |  |  *  | 
434  |  |  * Parse an host part and fills in the appropriate fields  | 
435  |  |  * of the @uri structure  | 
436  |  |  *  | 
437  |  |  * host          = IP-literal / IPv4address / reg-name  | 
438  |  |  * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"  | 
439  |  |  * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet  | 
440  |  |  * reg-name      = *( unreserved / pct-encoded / sub-delims )  | 
441  |  |  *  | 
442  |  |  * Returns 0 or the error code  | 
443  |  |  */  | 
444  |  | static int  | 
445  |  | xmlParse3986Host(xmlURIPtr uri, const char **str)  | 
446  | 679k  | { | 
447  | 679k  |     const char *cur = *str;  | 
448  | 679k  |     const char *host;  | 
449  |  |  | 
450  | 679k  |     host = cur;  | 
451  |  |     /*  | 
452  |  |      * IPv6 and future addressing scheme are enclosed between brackets  | 
453  |  |      */  | 
454  | 679k  |     if (*cur == '[') { | 
455  | 9.83k  |         cur++;  | 
456  | 751k  |   while ((*cur != ']') && (*cur != 0))  | 
457  | 741k  |       cur++;  | 
458  | 9.83k  |   if (*cur != ']')  | 
459  | 6.07k  |       return(1);  | 
460  | 3.76k  |   cur++;  | 
461  | 3.76k  |   goto found;  | 
462  | 9.83k  |     }  | 
463  |  |     /*  | 
464  |  |      * try to parse an IPv4  | 
465  |  |      */  | 
466  | 669k  |     if (ISA_DIGIT(cur)) { | 
467  | 5.59k  |         if (xmlParse3986DecOctet(&cur) != 0)  | 
468  | 185  |       goto not_ipv4;  | 
469  | 5.40k  |   if (*cur != '.')  | 
470  | 3.80k  |       goto not_ipv4;  | 
471  | 1.59k  |   cur++;  | 
472  | 1.59k  |         if (xmlParse3986DecOctet(&cur) != 0)  | 
473  | 1.58k  |       goto not_ipv4;  | 
474  | 16  |   if (*cur != '.')  | 
475  | 16  |       goto not_ipv4;  | 
476  | 0  |         if (xmlParse3986DecOctet(&cur) != 0)  | 
477  | 0  |       goto not_ipv4;  | 
478  | 0  |   if (*cur != '.')  | 
479  | 0  |       goto not_ipv4;  | 
480  | 0  |         if (xmlParse3986DecOctet(&cur) != 0)  | 
481  | 0  |       goto not_ipv4;  | 
482  | 0  |   goto found;  | 
483  | 5.59k  | not_ipv4:  | 
484  | 5.59k  |         cur = *str;  | 
485  | 5.59k  |     }  | 
486  |  |     /*  | 
487  |  |      * then this should be a hostname which can be empty  | 
488  |  |      */  | 
489  | 10.8M  |     while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))  | 
490  | 10.1M  |         NEXT(cur);  | 
491  | 673k  | found:  | 
492  | 673k  |     if (uri != NULL) { | 
493  | 673k  |   if (uri->authority != NULL) xmlFree(uri->authority);  | 
494  | 673k  |   uri->authority = NULL;  | 
495  | 673k  |   if (uri->server != NULL) xmlFree(uri->server);  | 
496  | 673k  |   if (cur != host) { | 
497  | 559k  |       if (uri->cleanup & 2)  | 
498  | 0  |     uri->server = STRNDUP(host, cur - host);  | 
499  | 559k  |       else  | 
500  | 559k  |     uri->server = xmlURIUnescapeString(host, cur - host, NULL);  | 
501  | 559k  |   } else  | 
502  | 113k  |       uri->server = NULL;  | 
503  | 673k  |     }  | 
504  | 673k  |     *str = cur;  | 
505  | 673k  |     return(0);  | 
506  | 669k  | }  | 
507  |  |  | 
508  |  | /**  | 
509  |  |  * xmlParse3986Authority:  | 
510  |  |  * @uri:  pointer to an URI structure  | 
511  |  |  * @str:  the string to analyze  | 
512  |  |  *  | 
513  |  |  * Parse an authority part and fills in the appropriate fields  | 
514  |  |  * of the @uri structure  | 
515  |  |  *  | 
516  |  |  * authority     = [ userinfo "@" ] host [ ":" port ]  | 
517  |  |  *  | 
518  |  |  * Returns 0 or the error code  | 
519  |  |  */  | 
520  |  | static int  | 
521  |  | xmlParse3986Authority(xmlURIPtr uri, const char **str)  | 
522  | 679k  | { | 
523  | 679k  |     const char *cur;  | 
524  | 679k  |     int ret;  | 
525  |  |  | 
526  | 679k  |     cur = *str;  | 
527  |  |     /*  | 
528  |  |      * try to parse an userinfo and check for the trailing @  | 
529  |  |      */  | 
530  | 679k  |     ret = xmlParse3986Userinfo(uri, &cur);  | 
531  | 679k  |     if ((ret != 0) || (*cur != '@'))  | 
532  | 611k  |         cur = *str;  | 
533  | 67.7k  |     else  | 
534  | 67.7k  |         cur++;  | 
535  | 679k  |     ret = xmlParse3986Host(uri, &cur);  | 
536  | 679k  |     if (ret != 0) return(ret);  | 
537  | 673k  |     if (*cur == ':') { | 
538  | 23.4k  |         cur++;  | 
539  | 23.4k  |         ret = xmlParse3986Port(uri, &cur);  | 
540  | 23.4k  |   if (ret != 0) return(ret);  | 
541  | 23.4k  |     }  | 
542  | 656k  |     *str = cur;  | 
543  | 656k  |     return(0);  | 
544  | 673k  | }  | 
545  |  |  | 
546  |  | /**  | 
547  |  |  * xmlParse3986Segment:  | 
548  |  |  * @str:  the string to analyze  | 
549  |  |  * @forbid: an optional forbidden character  | 
550  |  |  * @empty: allow an empty segment  | 
551  |  |  *  | 
552  |  |  * Parse a segment and fills in the appropriate fields  | 
553  |  |  * of the @uri structure  | 
554  |  |  *  | 
555  |  |  * segment       = *pchar  | 
556  |  |  * segment-nz    = 1*pchar  | 
557  |  |  * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )  | 
558  |  |  *               ; non-zero-length segment without any colon ":"  | 
559  |  |  *  | 
560  |  |  * Returns 0 or the error code  | 
561  |  |  */  | 
562  |  | static int  | 
563  |  | xmlParse3986Segment(const char **str, char forbid, int empty)  | 
564  | 8.20M  | { | 
565  | 8.20M  |     const char *cur;  | 
566  |  |  | 
567  | 8.20M  |     cur = *str;  | 
568  | 8.20M  |     if (!ISA_PCHAR(cur)) { | 
569  | 525k  |         if (empty)  | 
570  | 507k  |       return(0);  | 
571  | 18.1k  |   return(1);  | 
572  | 525k  |     }  | 
573  | 130M  |     while (ISA_PCHAR(cur) && (*cur != forbid))  | 
574  | 122M  |         NEXT(cur);  | 
575  | 7.67M  |     *str = cur;  | 
576  | 7.67M  |     return (0);  | 
577  | 8.20M  | }  | 
578  |  |  | 
579  |  | /**  | 
580  |  |  * xmlParse3986PathAbEmpty:  | 
581  |  |  * @uri:  pointer to an URI structure  | 
582  |  |  * @str:  the string to analyze  | 
583  |  |  *  | 
584  |  |  * Parse an path absolute or empty and fills in the appropriate fields  | 
585  |  |  * of the @uri structure  | 
586  |  |  *  | 
587  |  |  * path-abempty  = *( "/" segment )  | 
588  |  |  *  | 
589  |  |  * Returns 0 or the error code  | 
590  |  |  */  | 
591  |  | static int  | 
592  |  | xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)  | 
593  | 656k  | { | 
594  | 656k  |     const char *cur;  | 
595  | 656k  |     int ret;  | 
596  |  |  | 
597  | 656k  |     cur = *str;  | 
598  |  |  | 
599  | 1.63M  |     while (*cur == '/') { | 
600  | 973k  |         cur++;  | 
601  | 973k  |   ret = xmlParse3986Segment(&cur, 0, 1);  | 
602  | 973k  |   if (ret != 0) return(ret);  | 
603  | 973k  |     }  | 
604  | 656k  |     if (uri != NULL) { | 
605  | 656k  |   if (uri->path != NULL) xmlFree(uri->path);  | 
606  | 656k  |         if (*str != cur) { | 
607  | 283k  |             if (uri->cleanup & 2)  | 
608  | 0  |                 uri->path = STRNDUP(*str, cur - *str);  | 
609  | 283k  |             else  | 
610  | 283k  |                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);  | 
611  | 373k  |         } else { | 
612  | 373k  |             uri->path = NULL;  | 
613  | 373k  |         }  | 
614  | 656k  |     }  | 
615  | 656k  |     *str = cur;  | 
616  | 656k  |     return (0);  | 
617  | 656k  | }  | 
618  |  |  | 
619  |  | /**  | 
620  |  |  * xmlParse3986PathAbsolute:  | 
621  |  |  * @uri:  pointer to an URI structure  | 
622  |  |  * @str:  the string to analyze  | 
623  |  |  *  | 
624  |  |  * Parse an path absolute and fills in the appropriate fields  | 
625  |  |  * of the @uri structure  | 
626  |  |  *  | 
627  |  |  * path-absolute = "/" [ segment-nz *( "/" segment ) ]  | 
628  |  |  *  | 
629  |  |  * Returns 0 or the error code  | 
630  |  |  */  | 
631  |  | static int  | 
632  |  | xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)  | 
633  | 70.2k  | { | 
634  | 70.2k  |     const char *cur;  | 
635  | 70.2k  |     int ret;  | 
636  |  |  | 
637  | 70.2k  |     cur = *str;  | 
638  |  |  | 
639  | 70.2k  |     if (*cur != '/')  | 
640  | 0  |         return(1);  | 
641  | 70.2k  |     cur++;  | 
642  | 70.2k  |     ret = xmlParse3986Segment(&cur, 0, 0);  | 
643  | 70.2k  |     if (ret == 0) { | 
644  | 114k  |   while (*cur == '/') { | 
645  | 62.7k  |       cur++;  | 
646  | 62.7k  |       ret = xmlParse3986Segment(&cur, 0, 1);  | 
647  | 62.7k  |       if (ret != 0) return(ret);  | 
648  | 62.7k  |   }  | 
649  | 52.1k  |     }  | 
650  | 70.2k  |     if (uri != NULL) { | 
651  | 70.2k  |   if (uri->path != NULL) xmlFree(uri->path);  | 
652  | 70.2k  |         if (cur != *str) { | 
653  | 70.2k  |             if (uri->cleanup & 2)  | 
654  | 0  |                 uri->path = STRNDUP(*str, cur - *str);  | 
655  | 70.2k  |             else  | 
656  | 70.2k  |                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);  | 
657  | 70.2k  |         } else { | 
658  | 0  |             uri->path = NULL;  | 
659  | 0  |         }  | 
660  | 70.2k  |     }  | 
661  | 70.2k  |     *str = cur;  | 
662  | 70.2k  |     return (0);  | 
663  | 70.2k  | }  | 
664  |  |  | 
665  |  | /**  | 
666  |  |  * xmlParse3986PathRootless:  | 
667  |  |  * @uri:  pointer to an URI structure  | 
668  |  |  * @str:  the string to analyze  | 
669  |  |  *  | 
670  |  |  * Parse an path without root and fills in the appropriate fields  | 
671  |  |  * of the @uri structure  | 
672  |  |  *  | 
673  |  |  * path-rootless = segment-nz *( "/" segment )  | 
674  |  |  *  | 
675  |  |  * Returns 0 or the error code  | 
676  |  |  */  | 
677  |  | static int  | 
678  |  | xmlParse3986PathRootless(xmlURIPtr uri, const char **str)  | 
679  | 371k  | { | 
680  | 371k  |     const char *cur;  | 
681  | 371k  |     int ret;  | 
682  |  |  | 
683  | 371k  |     cur = *str;  | 
684  |  |  | 
685  | 371k  |     ret = xmlParse3986Segment(&cur, 0, 0);  | 
686  | 371k  |     if (ret != 0) return(ret);  | 
687  | 457k  |     while (*cur == '/') { | 
688  | 86.5k  |         cur++;  | 
689  | 86.5k  |   ret = xmlParse3986Segment(&cur, 0, 1);  | 
690  | 86.5k  |   if (ret != 0) return(ret);  | 
691  | 86.5k  |     }  | 
692  | 371k  |     if (uri != NULL) { | 
693  | 371k  |   if (uri->path != NULL) xmlFree(uri->path);  | 
694  | 371k  |         if (cur != *str) { | 
695  | 371k  |             if (uri->cleanup & 2)  | 
696  | 0  |                 uri->path = STRNDUP(*str, cur - *str);  | 
697  | 371k  |             else  | 
698  | 371k  |                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);  | 
699  | 371k  |         } else { | 
700  | 0  |             uri->path = NULL;  | 
701  | 0  |         }  | 
702  | 371k  |     }  | 
703  | 371k  |     *str = cur;  | 
704  | 371k  |     return (0);  | 
705  | 371k  | }  | 
706  |  |  | 
707  |  | /**  | 
708  |  |  * xmlParse3986PathNoScheme:  | 
709  |  |  * @uri:  pointer to an URI structure  | 
710  |  |  * @str:  the string to analyze  | 
711  |  |  *  | 
712  |  |  * Parse an path which is not a scheme and fills in the appropriate fields  | 
713  |  |  * of the @uri structure  | 
714  |  |  *  | 
715  |  |  * path-noscheme = segment-nz-nc *( "/" segment )  | 
716  |  |  *  | 
717  |  |  * Returns 0 or the error code  | 
718  |  |  */  | 
719  |  | static int  | 
720  |  | xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)  | 
721  | 5.87M  | { | 
722  | 5.87M  |     const char *cur;  | 
723  | 5.87M  |     int ret;  | 
724  |  |  | 
725  | 5.87M  |     cur = *str;  | 
726  |  |  | 
727  | 5.87M  |     ret = xmlParse3986Segment(&cur, ':', 0);  | 
728  | 5.87M  |     if (ret != 0) return(ret);  | 
729  | 6.63M  |     while (*cur == '/') { | 
730  | 764k  |         cur++;  | 
731  | 764k  |   ret = xmlParse3986Segment(&cur, 0, 1);  | 
732  | 764k  |   if (ret != 0) return(ret);  | 
733  | 764k  |     }  | 
734  | 5.87M  |     if (uri != NULL) { | 
735  | 5.87M  |   if (uri->path != NULL) xmlFree(uri->path);  | 
736  | 5.87M  |         if (cur != *str) { | 
737  | 5.86M  |             if (uri->cleanup & 2)  | 
738  | 0  |                 uri->path = STRNDUP(*str, cur - *str);  | 
739  | 5.86M  |             else  | 
740  | 5.86M  |                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);  | 
741  | 5.86M  |         } else { | 
742  | 5.77k  |             uri->path = NULL;  | 
743  | 5.77k  |         }  | 
744  | 5.87M  |     }  | 
745  | 5.87M  |     *str = cur;  | 
746  | 5.87M  |     return (0);  | 
747  | 5.87M  | }  | 
748  |  |  | 
749  |  | /**  | 
750  |  |  * xmlParse3986HierPart:  | 
751  |  |  * @uri:  pointer to an URI structure  | 
752  |  |  * @str:  the string to analyze  | 
753  |  |  *  | 
754  |  |  * Parse an hierarchical part and fills in the appropriate fields  | 
755  |  |  * of the @uri structure  | 
756  |  |  *  | 
757  |  |  * hier-part     = "//" authority path-abempty  | 
758  |  |  *                / path-absolute  | 
759  |  |  *                / path-rootless  | 
760  |  |  *                / path-empty  | 
761  |  |  *  | 
762  |  |  * Returns 0 or the error code  | 
763  |  |  */  | 
764  |  | static int  | 
765  |  | xmlParse3986HierPart(xmlURIPtr uri, const char **str)  | 
766  | 1.09M  | { | 
767  | 1.09M  |     const char *cur;  | 
768  | 1.09M  |     int ret;  | 
769  |  |  | 
770  | 1.09M  |     cur = *str;  | 
771  |  |  | 
772  | 1.09M  |     if ((*cur == '/') && (*(cur + 1) == '/')) { | 
773  | 660k  |         cur += 2;  | 
774  | 660k  |   ret = xmlParse3986Authority(uri, &cur);  | 
775  | 660k  |   if (ret != 0) return(ret);  | 
776  |  |         /*  | 
777  |  |          * An empty server is marked with a special URI value.  | 
778  |  |          */  | 
779  | 638k  |   if ((uri->server == NULL) && (uri->port == PORT_EMPTY))  | 
780  | 96.8k  |       uri->port = PORT_EMPTY_SERVER;  | 
781  | 638k  |   ret = xmlParse3986PathAbEmpty(uri, &cur);  | 
782  | 638k  |   if (ret != 0) return(ret);  | 
783  | 638k  |   *str = cur;  | 
784  | 638k  |   return(0);  | 
785  | 638k  |     } else if (*cur == '/') { | 
786  | 29.4k  |         ret = xmlParse3986PathAbsolute(uri, &cur);  | 
787  | 29.4k  |   if (ret != 0) return(ret);  | 
788  | 408k  |     } else if (ISA_PCHAR(cur)) { | 
789  | 371k  |         ret = xmlParse3986PathRootless(uri, &cur);  | 
790  | 371k  |   if (ret != 0) return(ret);  | 
791  | 371k  |     } else { | 
792  |  |   /* path-empty is effectively empty */  | 
793  | 37.4k  |   if (uri != NULL) { | 
794  | 37.4k  |       if (uri->path != NULL) xmlFree(uri->path);  | 
795  | 37.4k  |       uri->path = NULL;  | 
796  | 37.4k  |   }  | 
797  | 37.4k  |     }  | 
798  | 437k  |     *str = cur;  | 
799  | 437k  |     return (0);  | 
800  | 1.09M  | }  | 
801  |  |  | 
802  |  | /**  | 
803  |  |  * xmlParse3986RelativeRef:  | 
804  |  |  * @uri:  pointer to an URI structure  | 
805  |  |  * @str:  the string to analyze  | 
806  |  |  *  | 
807  |  |  * Parse an URI string and fills in the appropriate fields  | 
808  |  |  * of the @uri structure  | 
809  |  |  *  | 
810  |  |  * relative-ref  = relative-part [ "?" query ] [ "#" fragment ]  | 
811  |  |  * relative-part = "//" authority path-abempty  | 
812  |  |  *               / path-absolute  | 
813  |  |  *               / path-noscheme  | 
814  |  |  *               / path-empty  | 
815  |  |  *  | 
816  |  |  * Returns 0 or the error code  | 
817  |  |  */  | 
818  |  | static int  | 
819  | 6.41M  | xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) { | 
820  | 6.41M  |     int ret;  | 
821  |  |  | 
822  | 6.41M  |     if ((*str == '/') && (*(str + 1) == '/')) { | 
823  | 18.8k  |         str += 2;  | 
824  | 18.8k  |   ret = xmlParse3986Authority(uri, &str);  | 
825  | 18.8k  |   if (ret != 0) return(ret);  | 
826  | 18.3k  |   ret = xmlParse3986PathAbEmpty(uri, &str);  | 
827  | 18.3k  |   if (ret != 0) return(ret);  | 
828  | 6.39M  |     } else if (*str == '/') { | 
829  | 40.8k  |   ret = xmlParse3986PathAbsolute(uri, &str);  | 
830  | 40.8k  |   if (ret != 0) return(ret);  | 
831  | 6.35M  |     } else if (ISA_PCHAR(str)) { | 
832  | 5.87M  |         ret = xmlParse3986PathNoScheme(uri, &str);  | 
833  | 5.87M  |   if (ret != 0) return(ret);  | 
834  | 5.87M  |     } else { | 
835  |  |   /* path-empty is effectively empty */  | 
836  | 478k  |   if (uri != NULL) { | 
837  | 478k  |       if (uri->path != NULL) xmlFree(uri->path);  | 
838  | 478k  |       uri->path = NULL;  | 
839  | 478k  |   }  | 
840  | 478k  |     }  | 
841  |  |  | 
842  | 6.41M  |     if (*str == '?') { | 
843  | 132k  |   str++;  | 
844  | 132k  |   ret = xmlParse3986Query(uri, &str);  | 
845  | 132k  |   if (ret != 0) return(ret);  | 
846  | 132k  |     }  | 
847  | 6.41M  |     if (*str == '#') { | 
848  | 68.7k  |   str++;  | 
849  | 68.7k  |   ret = xmlParse3986Fragment(uri, &str);  | 
850  | 68.7k  |   if (ret != 0) return(ret);  | 
851  | 68.7k  |     }  | 
852  | 6.41M  |     if (*str != 0) { | 
853  | 1.27M  |   xmlCleanURI(uri);  | 
854  | 1.27M  |   return(1);  | 
855  | 1.27M  |     }  | 
856  | 5.13M  |     return(0);  | 
857  | 6.41M  | }  | 
858  |  |  | 
859  |  |  | 
860  |  | /**  | 
861  |  |  * xmlParse3986URI:  | 
862  |  |  * @uri:  pointer to an URI structure  | 
863  |  |  * @str:  the string to analyze  | 
864  |  |  *  | 
865  |  |  * Parse an URI string and fills in the appropriate fields  | 
866  |  |  * of the @uri structure  | 
867  |  |  *  | 
868  |  |  * scheme ":" hier-part [ "?" query ] [ "#" fragment ]  | 
869  |  |  *  | 
870  |  |  * Returns 0 or the error code  | 
871  |  |  */  | 
872  |  | static int  | 
873  | 7.14M  | xmlParse3986URI(xmlURIPtr uri, const char *str) { | 
874  | 7.14M  |     int ret;  | 
875  |  |  | 
876  | 7.14M  |     ret = xmlParse3986Scheme(uri, &str);  | 
877  | 7.14M  |     if (ret != 0) return(ret);  | 
878  | 6.23M  |     if (*str != ':') { | 
879  | 5.13M  |   return(1);  | 
880  | 5.13M  |     }  | 
881  | 1.09M  |     str++;  | 
882  | 1.09M  |     ret = xmlParse3986HierPart(uri, &str);  | 
883  | 1.09M  |     if (ret != 0) return(ret);  | 
884  | 1.07M  |     if (*str == '?') { | 
885  | 44.6k  |   str++;  | 
886  | 44.6k  |   ret = xmlParse3986Query(uri, &str);  | 
887  | 44.6k  |   if (ret != 0) return(ret);  | 
888  | 44.6k  |     }  | 
889  | 1.07M  |     if (*str == '#') { | 
890  | 57.9k  |   str++;  | 
891  | 57.9k  |   ret = xmlParse3986Fragment(uri, &str);  | 
892  | 57.9k  |   if (ret != 0) return(ret);  | 
893  | 57.9k  |     }  | 
894  | 1.07M  |     if (*str != 0) { | 
895  | 339k  |   xmlCleanURI(uri);  | 
896  | 339k  |   return(1);  | 
897  | 339k  |     }  | 
898  | 737k  |     return(0);  | 
899  | 1.07M  | }  | 
900  |  |  | 
901  |  | /**  | 
902  |  |  * xmlParse3986URIReference:  | 
903  |  |  * @uri:  pointer to an URI structure  | 
904  |  |  * @str:  the string to analyze  | 
905  |  |  *  | 
906  |  |  * Parse an URI reference string and fills in the appropriate fields  | 
907  |  |  * of the @uri structure  | 
908  |  |  *  | 
909  |  |  * URI-reference = URI / relative-ref  | 
910  |  |  *  | 
911  |  |  * Returns 0 or the error code  | 
912  |  |  */  | 
913  |  | static int  | 
914  | 7.14M  | xmlParse3986URIReference(xmlURIPtr uri, const char *str) { | 
915  | 7.14M  |     int ret;  | 
916  |  |  | 
917  | 7.14M  |     if (str == NULL)  | 
918  | 0  |   return(-1);  | 
919  | 7.14M  |     xmlCleanURI(uri);  | 
920  |  |  | 
921  |  |     /*  | 
922  |  |      * Try first to parse absolute refs, then fallback to relative if  | 
923  |  |      * it fails.  | 
924  |  |      */  | 
925  | 7.14M  |     ret = xmlParse3986URI(uri, str);  | 
926  | 7.14M  |     if (ret != 0) { | 
927  | 6.41M  |   xmlCleanURI(uri);  | 
928  | 6.41M  |         ret = xmlParse3986RelativeRef(uri, str);  | 
929  | 6.41M  |   if (ret != 0) { | 
930  | 1.27M  |       xmlCleanURI(uri);  | 
931  | 1.27M  |       return(ret);  | 
932  | 1.27M  |   }  | 
933  | 6.41M  |     }  | 
934  | 5.87M  |     return(0);  | 
935  | 7.14M  | }  | 
936  |  |  | 
937  |  | /**  | 
938  |  |  * xmlParseURI:  | 
939  |  |  * @str:  the URI string to analyze  | 
940  |  |  *  | 
941  |  |  * Parse an URI based on RFC 3986  | 
942  |  |  *  | 
943  |  |  * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]  | 
944  |  |  *  | 
945  |  |  * Returns a newly built xmlURIPtr or NULL in case of error  | 
946  |  |  */  | 
947  |  | xmlURIPtr  | 
948  | 5.54M  | xmlParseURI(const char *str) { | 
949  | 5.54M  |     xmlURIPtr uri;  | 
950  | 5.54M  |     int ret;  | 
951  |  |  | 
952  | 5.54M  |     if (str == NULL)  | 
953  | 0  |   return(NULL);  | 
954  | 5.54M  |     uri = xmlCreateURI();  | 
955  | 5.54M  |     if (uri != NULL) { | 
956  | 5.54M  |   ret = xmlParse3986URIReference(uri, str);  | 
957  | 5.54M  |         if (ret) { | 
958  | 1.10M  |       xmlFreeURI(uri);  | 
959  | 1.10M  |       return(NULL);  | 
960  | 1.10M  |   }  | 
961  | 5.54M  |     }  | 
962  | 4.43M  |     return(uri);  | 
963  | 5.54M  | }  | 
964  |  |  | 
965  |  | /**  | 
966  |  |  * xmlParseURIReference:  | 
967  |  |  * @uri:  pointer to an URI structure  | 
968  |  |  * @str:  the string to analyze  | 
969  |  |  *  | 
970  |  |  * Parse an URI reference string based on RFC 3986 and fills in the  | 
971  |  |  * appropriate fields of the @uri structure  | 
972  |  |  *  | 
973  |  |  * URI-reference = URI / relative-ref  | 
974  |  |  *  | 
975  |  |  * Returns 0 or the error code  | 
976  |  |  */  | 
977  |  | int  | 
978  | 1.60M  | xmlParseURIReference(xmlURIPtr uri, const char *str) { | 
979  | 1.60M  |     return(xmlParse3986URIReference(uri, str));  | 
980  | 1.60M  | }  | 
981  |  |  | 
982  |  | /**  | 
983  |  |  * xmlParseURIRaw:  | 
984  |  |  * @str:  the URI string to analyze  | 
985  |  |  * @raw:  if 1 unescaping of URI pieces are disabled  | 
986  |  |  *  | 
987  |  |  * Parse an URI but allows to keep intact the original fragments.  | 
988  |  |  *  | 
989  |  |  * URI-reference = URI / relative-ref  | 
990  |  |  *  | 
991  |  |  * Returns a newly built xmlURIPtr or NULL in case of error  | 
992  |  |  */  | 
993  |  | xmlURIPtr  | 
994  | 0  | xmlParseURIRaw(const char *str, int raw) { | 
995  | 0  |     xmlURIPtr uri;  | 
996  | 0  |     int ret;  | 
997  |  | 
  | 
998  | 0  |     if (str == NULL)  | 
999  | 0  |   return(NULL);  | 
1000  | 0  |     uri = xmlCreateURI();  | 
1001  | 0  |     if (uri != NULL) { | 
1002  | 0  |         if (raw) { | 
1003  | 0  |       uri->cleanup |= 2;  | 
1004  | 0  |   }  | 
1005  | 0  |   ret = xmlParseURIReference(uri, str);  | 
1006  | 0  |         if (ret) { | 
1007  | 0  |       xmlFreeURI(uri);  | 
1008  | 0  |       return(NULL);  | 
1009  | 0  |   }  | 
1010  | 0  |     }  | 
1011  | 0  |     return(uri);  | 
1012  | 0  | }  | 
1013  |  |  | 
1014  |  | /************************************************************************  | 
1015  |  |  *                  *  | 
1016  |  |  *      Generic URI structure functions     *  | 
1017  |  |  *                  *  | 
1018  |  |  ************************************************************************/  | 
1019  |  |  | 
1020  |  | /**  | 
1021  |  |  * xmlCreateURI:  | 
1022  |  |  *  | 
1023  |  |  * Simply creates an empty xmlURI  | 
1024  |  |  *  | 
1025  |  |  * Returns the new structure or NULL in case of error  | 
1026  |  |  */  | 
1027  |  | xmlURIPtr  | 
1028  | 7.29M  | xmlCreateURI(void) { | 
1029  | 7.29M  |     xmlURIPtr ret;  | 
1030  |  |  | 
1031  | 7.29M  |     ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));  | 
1032  | 7.29M  |     if (ret == NULL) { | 
1033  | 0  |         xmlURIErrMemory("creating URI structure\n"); | 
1034  | 0  |   return(NULL);  | 
1035  | 0  |     }  | 
1036  | 7.29M  |     memset(ret, 0, sizeof(xmlURI));  | 
1037  | 7.29M  |     ret->port = PORT_EMPTY;  | 
1038  | 7.29M  |     return(ret);  | 
1039  | 7.29M  | }  | 
1040  |  |  | 
1041  |  | /**  | 
1042  |  |  * xmlSaveUriRealloc:  | 
1043  |  |  *  | 
1044  |  |  * Function to handle properly a reallocation when saving an URI  | 
1045  |  |  * Also imposes some limit on the length of an URI string output  | 
1046  |  |  */  | 
1047  |  | static xmlChar *  | 
1048  | 151k  | xmlSaveUriRealloc(xmlChar *ret, int *max) { | 
1049  | 151k  |     xmlChar *temp;  | 
1050  | 151k  |     int tmp;  | 
1051  |  |  | 
1052  | 151k  |     if (*max > MAX_URI_LENGTH) { | 
1053  | 0  |         xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n"); | 
1054  | 0  |         return(NULL);  | 
1055  | 0  |     }  | 
1056  | 151k  |     tmp = *max * 2;  | 
1057  | 151k  |     temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));  | 
1058  | 151k  |     if (temp == NULL) { | 
1059  | 0  |         xmlURIErrMemory("saving URI\n"); | 
1060  | 0  |         return(NULL);  | 
1061  | 0  |     }  | 
1062  | 151k  |     *max = tmp;  | 
1063  | 151k  |     return(temp);  | 
1064  | 151k  | }  | 
1065  |  |  | 
1066  |  | /**  | 
1067  |  |  * xmlSaveUri:  | 
1068  |  |  * @uri:  pointer to an xmlURI  | 
1069  |  |  *  | 
1070  |  |  * Save the URI as an escaped string  | 
1071  |  |  *  | 
1072  |  |  * Returns a new string (to be deallocated by caller)  | 
1073  |  |  */  | 
1074  |  | xmlChar *  | 
1075  | 1.57M  | xmlSaveUri(xmlURIPtr uri) { | 
1076  | 1.57M  |     xmlChar *ret = NULL;  | 
1077  | 1.57M  |     xmlChar *temp;  | 
1078  | 1.57M  |     const char *p;  | 
1079  | 1.57M  |     int len;  | 
1080  | 1.57M  |     int max;  | 
1081  |  |  | 
1082  | 1.57M  |     if (uri == NULL) return(NULL);  | 
1083  |  |  | 
1084  |  |  | 
1085  | 1.57M  |     max = 80;  | 
1086  | 1.57M  |     ret = (xmlChar *) xmlMallocAtomic(max + 1);  | 
1087  | 1.57M  |     if (ret == NULL) { | 
1088  | 0  |         xmlURIErrMemory("saving URI\n"); | 
1089  | 0  |   return(NULL);  | 
1090  | 0  |     }  | 
1091  | 1.57M  |     len = 0;  | 
1092  |  |  | 
1093  | 1.57M  |     if (uri->scheme != NULL) { | 
1094  | 65.8k  |   p = uri->scheme;  | 
1095  | 1.38M  |   while (*p != 0) { | 
1096  | 1.31M  |       if (len >= max) { | 
1097  | 1.36k  |                 temp = xmlSaveUriRealloc(ret, &max);  | 
1098  | 1.36k  |                 if (temp == NULL) goto mem_error;  | 
1099  | 1.36k  |     ret = temp;  | 
1100  | 1.36k  |       }  | 
1101  | 1.31M  |       ret[len++] = *p++;  | 
1102  | 1.31M  |   }  | 
1103  | 65.8k  |   if (len >= max) { | 
1104  | 284  |             temp = xmlSaveUriRealloc(ret, &max);  | 
1105  | 284  |             if (temp == NULL) goto mem_error;  | 
1106  | 284  |             ret = temp;  | 
1107  | 284  |   }  | 
1108  | 65.8k  |   ret[len++] = ':';  | 
1109  | 65.8k  |     }  | 
1110  | 1.57M  |     if (uri->opaque != NULL) { | 
1111  | 0  |   p = uri->opaque;  | 
1112  | 0  |   while (*p != 0) { | 
1113  | 0  |       if (len + 3 >= max) { | 
1114  | 0  |                 temp = xmlSaveUriRealloc(ret, &max);  | 
1115  | 0  |                 if (temp == NULL) goto mem_error;  | 
1116  | 0  |                 ret = temp;  | 
1117  | 0  |       }  | 
1118  | 0  |       if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))  | 
1119  | 0  |     ret[len++] = *p++;  | 
1120  | 0  |       else { | 
1121  | 0  |     int val = *(unsigned char *)p++;  | 
1122  | 0  |     int hi = val / 0x10, lo = val % 0x10;  | 
1123  | 0  |     ret[len++] = '%';  | 
1124  | 0  |     ret[len++] = hi + (hi > 9? 'A'-10 : '0');  | 
1125  | 0  |     ret[len++] = lo + (lo > 9? 'A'-10 : '0');  | 
1126  | 0  |       }  | 
1127  | 0  |   }  | 
1128  | 1.57M  |     } else { | 
1129  | 1.57M  |   if ((uri->server != NULL) || (uri->port != PORT_EMPTY)) { | 
1130  | 47.1k  |       if (len + 3 >= max) { | 
1131  | 192  |                 temp = xmlSaveUriRealloc(ret, &max);  | 
1132  | 192  |                 if (temp == NULL) goto mem_error;  | 
1133  | 192  |                 ret = temp;  | 
1134  | 192  |       }  | 
1135  | 47.1k  |       ret[len++] = '/';  | 
1136  | 47.1k  |       ret[len++] = '/';  | 
1137  | 47.1k  |       if (uri->user != NULL) { | 
1138  | 10.5k  |     p = uri->user;  | 
1139  | 1.56M  |     while (*p != 0) { | 
1140  | 1.55M  |         if (len + 3 >= max) { | 
1141  | 4.87k  |                         temp = xmlSaveUriRealloc(ret, &max);  | 
1142  | 4.87k  |                         if (temp == NULL) goto mem_error;  | 
1143  | 4.87k  |                         ret = temp;  | 
1144  | 4.87k  |         }  | 
1145  | 1.55M  |         if ((IS_UNRESERVED(*(p))) ||  | 
1146  | 1.55M  |       ((*(p) == ';')) || ((*(p) == ':')) ||  | 
1147  | 1.55M  |       ((*(p) == '&')) || ((*(p) == '=')) ||  | 
1148  | 1.55M  |       ((*(p) == '+')) || ((*(p) == '$')) ||  | 
1149  | 1.55M  |       ((*(p) == ',')))  | 
1150  | 1.05M  |       ret[len++] = *p++;  | 
1151  | 501k  |         else { | 
1152  | 501k  |       int val = *(unsigned char *)p++;  | 
1153  | 501k  |       int hi = val / 0x10, lo = val % 0x10;  | 
1154  | 501k  |       ret[len++] = '%';  | 
1155  | 501k  |       ret[len++] = hi + (hi > 9? 'A'-10 : '0');  | 
1156  | 501k  |       ret[len++] = lo + (lo > 9? 'A'-10 : '0');  | 
1157  | 501k  |         }  | 
1158  | 1.55M  |     }  | 
1159  | 10.5k  |     if (len + 3 >= max) { | 
1160  | 423  |                     temp = xmlSaveUriRealloc(ret, &max);  | 
1161  | 423  |                     if (temp == NULL) goto mem_error;  | 
1162  | 423  |                     ret = temp;  | 
1163  | 423  |     }  | 
1164  | 10.5k  |     ret[len++] = '@';  | 
1165  | 10.5k  |       }  | 
1166  | 47.1k  |       if (uri->server != NULL) { | 
1167  | 36.7k  |     p = uri->server;  | 
1168  | 2.00M  |     while (*p != 0) { | 
1169  | 1.96M  |         if (len >= max) { | 
1170  | 2.12k  |       temp = xmlSaveUriRealloc(ret, &max);  | 
1171  | 2.12k  |       if (temp == NULL) goto mem_error;  | 
1172  | 2.12k  |       ret = temp;  | 
1173  | 2.12k  |         }  | 
1174  |  |                     /* TODO: escaping? */  | 
1175  | 1.96M  |         ret[len++] = (xmlChar) *p++;  | 
1176  | 1.96M  |     }  | 
1177  | 36.7k  |       }  | 
1178  | 47.1k  |             if (uri->port > 0) { | 
1179  | 1.06k  |                 if (len + 10 >= max) { | 
1180  | 226  |                     temp = xmlSaveUriRealloc(ret, &max);  | 
1181  | 226  |                     if (temp == NULL) goto mem_error;  | 
1182  | 226  |                     ret = temp;  | 
1183  | 226  |                 }  | 
1184  | 1.06k  |                 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);  | 
1185  | 1.06k  |             }  | 
1186  | 1.52M  |   } else if (uri->authority != NULL) { | 
1187  | 0  |       if (len + 3 >= max) { | 
1188  | 0  |                 temp = xmlSaveUriRealloc(ret, &max);  | 
1189  | 0  |                 if (temp == NULL) goto mem_error;  | 
1190  | 0  |                 ret = temp;  | 
1191  | 0  |       }  | 
1192  | 0  |       ret[len++] = '/';  | 
1193  | 0  |       ret[len++] = '/';  | 
1194  | 0  |       p = uri->authority;  | 
1195  | 0  |       while (*p != 0) { | 
1196  | 0  |     if (len + 3 >= max) { | 
1197  | 0  |                     temp = xmlSaveUriRealloc(ret, &max);  | 
1198  | 0  |                     if (temp == NULL) goto mem_error;  | 
1199  | 0  |                     ret = temp;  | 
1200  | 0  |     }  | 
1201  | 0  |     if ((IS_UNRESERVED(*(p))) ||  | 
1202  | 0  |                     ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||  | 
1203  | 0  |                     ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||  | 
1204  | 0  |                     ((*(p) == '=')) || ((*(p) == '+')))  | 
1205  | 0  |         ret[len++] = *p++;  | 
1206  | 0  |     else { | 
1207  | 0  |         int val = *(unsigned char *)p++;  | 
1208  | 0  |         int hi = val / 0x10, lo = val % 0x10;  | 
1209  | 0  |         ret[len++] = '%';  | 
1210  | 0  |         ret[len++] = hi + (hi > 9? 'A'-10 : '0');  | 
1211  | 0  |         ret[len++] = lo + (lo > 9? 'A'-10 : '0');  | 
1212  | 0  |     }  | 
1213  | 0  |       }  | 
1214  | 1.52M  |   } else if (uri->scheme != NULL) { | 
1215  | 22.6k  |       if (len + 3 >= max) { | 
1216  | 235  |                 temp = xmlSaveUriRealloc(ret, &max);  | 
1217  | 235  |                 if (temp == NULL) goto mem_error;  | 
1218  | 235  |                 ret = temp;  | 
1219  | 235  |       }  | 
1220  | 22.6k  |   }  | 
1221  | 1.57M  |   if (uri->path != NULL) { | 
1222  | 1.55M  |       p = uri->path;  | 
1223  |  |       /*  | 
1224  |  |        * the colon in file:///d: should not be escaped or  | 
1225  |  |        * Windows accesses fail later.  | 
1226  |  |        */  | 
1227  | 1.55M  |       if ((uri->scheme != NULL) &&  | 
1228  | 1.55M  |     (p[0] == '/') &&  | 
1229  | 1.55M  |     (((p[1] >= 'a') && (p[1] <= 'z')) ||  | 
1230  | 45.8k  |      ((p[1] >= 'A') && (p[1] <= 'Z'))) &&  | 
1231  | 1.55M  |     (p[2] == ':') &&  | 
1232  | 1.55M  |           (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) { | 
1233  | 16  |     if (len + 3 >= max) { | 
1234  | 0  |                     temp = xmlSaveUriRealloc(ret, &max);  | 
1235  | 0  |                     if (temp == NULL) goto mem_error;  | 
1236  | 0  |                     ret = temp;  | 
1237  | 0  |     }  | 
1238  | 16  |     ret[len++] = *p++;  | 
1239  | 16  |     ret[len++] = *p++;  | 
1240  | 16  |     ret[len++] = *p++;  | 
1241  | 16  |       }  | 
1242  | 68.4M  |       while (*p != 0) { | 
1243  | 66.8M  |     if (len + 3 >= max) { | 
1244  | 90.7k  |                     temp = xmlSaveUriRealloc(ret, &max);  | 
1245  | 90.7k  |                     if (temp == NULL) goto mem_error;  | 
1246  | 90.7k  |                     ret = temp;  | 
1247  | 90.7k  |     }  | 
1248  | 66.8M  |     if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||  | 
1249  | 66.8M  |                     ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||  | 
1250  | 66.8M  |               ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||  | 
1251  | 66.8M  |               ((*(p) == ',')))  | 
1252  | 61.7M  |         ret[len++] = *p++;  | 
1253  | 5.16M  |     else { | 
1254  | 5.16M  |         int val = *(unsigned char *)p++;  | 
1255  | 5.16M  |         int hi = val / 0x10, lo = val % 0x10;  | 
1256  | 5.16M  |         ret[len++] = '%';  | 
1257  | 5.16M  |         ret[len++] = hi + (hi > 9? 'A'-10 : '0');  | 
1258  | 5.16M  |         ret[len++] = lo + (lo > 9? 'A'-10 : '0');  | 
1259  | 5.16M  |     }  | 
1260  | 66.8M  |       }  | 
1261  | 1.55M  |   }  | 
1262  | 1.57M  |   if (uri->query_raw != NULL) { | 
1263  | 41.3k  |       if (len + 1 >= max) { | 
1264  | 63  |                 temp = xmlSaveUriRealloc(ret, &max);  | 
1265  | 63  |                 if (temp == NULL) goto mem_error;  | 
1266  | 63  |                 ret = temp;  | 
1267  | 63  |       }  | 
1268  | 41.3k  |       ret[len++] = '?';  | 
1269  | 41.3k  |       p = uri->query_raw;  | 
1270  | 3.16M  |       while (*p != 0) { | 
1271  | 3.12M  |     if (len + 1 >= max) { | 
1272  | 6.68k  |                     temp = xmlSaveUriRealloc(ret, &max);  | 
1273  | 6.68k  |                     if (temp == NULL) goto mem_error;  | 
1274  | 6.68k  |                     ret = temp;  | 
1275  | 6.68k  |     }  | 
1276  | 3.12M  |     ret[len++] = *p++;  | 
1277  | 3.12M  |       }  | 
1278  | 1.53M  |   } else if (uri->query != NULL) { | 
1279  | 0  |       if (len + 3 >= max) { | 
1280  | 0  |                 temp = xmlSaveUriRealloc(ret, &max);  | 
1281  | 0  |                 if (temp == NULL) goto mem_error;  | 
1282  | 0  |                 ret = temp;  | 
1283  | 0  |       }  | 
1284  | 0  |       ret[len++] = '?';  | 
1285  | 0  |       p = uri->query;  | 
1286  | 0  |       while (*p != 0) { | 
1287  | 0  |     if (len + 3 >= max) { | 
1288  | 0  |                     temp = xmlSaveUriRealloc(ret, &max);  | 
1289  | 0  |                     if (temp == NULL) goto mem_error;  | 
1290  | 0  |                     ret = temp;  | 
1291  | 0  |     }  | 
1292  | 0  |     if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))  | 
1293  | 0  |         ret[len++] = *p++;  | 
1294  | 0  |     else { | 
1295  | 0  |         int val = *(unsigned char *)p++;  | 
1296  | 0  |         int hi = val / 0x10, lo = val % 0x10;  | 
1297  | 0  |         ret[len++] = '%';  | 
1298  | 0  |         ret[len++] = hi + (hi > 9? 'A'-10 : '0');  | 
1299  | 0  |         ret[len++] = lo + (lo > 9? 'A'-10 : '0');  | 
1300  | 0  |     }  | 
1301  | 0  |       }  | 
1302  | 0  |   }  | 
1303  | 1.57M  |     }  | 
1304  | 1.57M  |     if (uri->fragment != NULL) { | 
1305  | 14.3k  |   if (len + 3 >= max) { | 
1306  | 157  |             temp = xmlSaveUriRealloc(ret, &max);  | 
1307  | 157  |             if (temp == NULL) goto mem_error;  | 
1308  | 157  |             ret = temp;  | 
1309  | 157  |   }  | 
1310  | 14.3k  |   ret[len++] = '#';  | 
1311  | 14.3k  |   p = uri->fragment;  | 
1312  | 2.82M  |   while (*p != 0) { | 
1313  | 2.80M  |       if (len + 3 >= max) { | 
1314  | 3.31k  |                 temp = xmlSaveUriRealloc(ret, &max);  | 
1315  | 3.31k  |                 if (temp == NULL) goto mem_error;  | 
1316  | 3.31k  |                 ret = temp;  | 
1317  | 3.31k  |       }  | 
1318  | 2.80M  |       if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))  | 
1319  | 2.80M  |     ret[len++] = *p++;  | 
1320  | 1.91k  |       else { | 
1321  | 1.91k  |     int val = *(unsigned char *)p++;  | 
1322  | 1.91k  |     int hi = val / 0x10, lo = val % 0x10;  | 
1323  | 1.91k  |     ret[len++] = '%';  | 
1324  | 1.91k  |     ret[len++] = hi + (hi > 9? 'A'-10 : '0');  | 
1325  | 1.91k  |     ret[len++] = lo + (lo > 9? 'A'-10 : '0');  | 
1326  | 1.91k  |       }  | 
1327  | 2.80M  |   }  | 
1328  | 14.3k  |     }  | 
1329  | 1.57M  |     if (len >= max) { | 
1330  | 50  |         temp = xmlSaveUriRealloc(ret, &max);  | 
1331  | 50  |         if (temp == NULL) goto mem_error;  | 
1332  | 50  |         ret = temp;  | 
1333  | 50  |     }  | 
1334  | 1.57M  |     ret[len] = 0;  | 
1335  | 1.57M  |     return(ret);  | 
1336  |  |  | 
1337  | 0  | mem_error:  | 
1338  | 0  |     xmlFree(ret);  | 
1339  | 0  |     return(NULL);  | 
1340  | 1.57M  | }  | 
1341  |  |  | 
1342  |  | /**  | 
1343  |  |  * xmlPrintURI:  | 
1344  |  |  * @stream:  a FILE* for the output  | 
1345  |  |  * @uri:  pointer to an xmlURI  | 
1346  |  |  *  | 
1347  |  |  * Prints the URI in the stream @stream.  | 
1348  |  |  */  | 
1349  |  | void  | 
1350  | 0  | xmlPrintURI(FILE *stream, xmlURIPtr uri) { | 
1351  | 0  |     xmlChar *out;  | 
1352  |  | 
  | 
1353  | 0  |     out = xmlSaveUri(uri);  | 
1354  | 0  |     if (out != NULL) { | 
1355  | 0  |   fprintf(stream, "%s", (char *) out);  | 
1356  | 0  |   xmlFree(out);  | 
1357  | 0  |     }  | 
1358  | 0  | }  | 
1359  |  |  | 
1360  |  | /**  | 
1361  |  |  * xmlCleanURI:  | 
1362  |  |  * @uri:  pointer to an xmlURI  | 
1363  |  |  *  | 
1364  |  |  * Make sure the xmlURI struct is free of content  | 
1365  |  |  */  | 
1366  |  | static void  | 
1367  | 16.4M  | xmlCleanURI(xmlURIPtr uri) { | 
1368  | 16.4M  |     if (uri == NULL) return;  | 
1369  |  |  | 
1370  | 16.4M  |     if (uri->scheme != NULL) xmlFree(uri->scheme);  | 
1371  | 16.4M  |     uri->scheme = NULL;  | 
1372  | 16.4M  |     if (uri->server != NULL) xmlFree(uri->server);  | 
1373  | 16.4M  |     uri->server = NULL;  | 
1374  | 16.4M  |     if (uri->user != NULL) xmlFree(uri->user);  | 
1375  | 16.4M  |     uri->user = NULL;  | 
1376  | 16.4M  |     if (uri->path != NULL) xmlFree(uri->path);  | 
1377  | 16.4M  |     uri->path = NULL;  | 
1378  | 16.4M  |     if (uri->fragment != NULL) xmlFree(uri->fragment);  | 
1379  | 16.4M  |     uri->fragment = NULL;  | 
1380  | 16.4M  |     if (uri->opaque != NULL) xmlFree(uri->opaque);  | 
1381  | 16.4M  |     uri->opaque = NULL;  | 
1382  | 16.4M  |     if (uri->authority != NULL) xmlFree(uri->authority);  | 
1383  | 16.4M  |     uri->authority = NULL;  | 
1384  | 16.4M  |     if (uri->query != NULL) xmlFree(uri->query);  | 
1385  | 16.4M  |     uri->query = NULL;  | 
1386  | 16.4M  |     if (uri->query_raw != NULL) xmlFree(uri->query_raw);  | 
1387  | 16.4M  |     uri->query_raw = NULL;  | 
1388  | 16.4M  | }  | 
1389  |  |  | 
1390  |  | /**  | 
1391  |  |  * xmlFreeURI:  | 
1392  |  |  * @uri:  pointer to an xmlURI  | 
1393  |  |  *  | 
1394  |  |  * Free up the xmlURI struct  | 
1395  |  |  */  | 
1396  |  | void  | 
1397  | 7.29M  | xmlFreeURI(xmlURIPtr uri) { | 
1398  | 7.29M  |     if (uri == NULL) return;  | 
1399  |  |  | 
1400  | 7.29M  |     if (uri->scheme != NULL) xmlFree(uri->scheme);  | 
1401  | 7.29M  |     if (uri->server != NULL) xmlFree(uri->server);  | 
1402  | 7.29M  |     if (uri->user != NULL) xmlFree(uri->user);  | 
1403  | 7.29M  |     if (uri->path != NULL) xmlFree(uri->path);  | 
1404  | 7.29M  |     if (uri->fragment != NULL) xmlFree(uri->fragment);  | 
1405  | 7.29M  |     if (uri->opaque != NULL) xmlFree(uri->opaque);  | 
1406  | 7.29M  |     if (uri->authority != NULL) xmlFree(uri->authority);  | 
1407  | 7.29M  |     if (uri->query != NULL) xmlFree(uri->query);  | 
1408  | 7.29M  |     if (uri->query_raw != NULL) xmlFree(uri->query_raw);  | 
1409  | 7.29M  |     xmlFree(uri);  | 
1410  | 7.29M  | }  | 
1411  |  |  | 
1412  |  | /************************************************************************  | 
1413  |  |  *                  *  | 
1414  |  |  *      Helper functions        *  | 
1415  |  |  *                  *  | 
1416  |  |  ************************************************************************/  | 
1417  |  |  | 
1418  |  | /**  | 
1419  |  |  * xmlNormalizeURIPath:  | 
1420  |  |  * @path:  pointer to the path string  | 
1421  |  |  *  | 
1422  |  |  * Applies the 5 normalization steps to a path string--that is, RFC 2396  | 
1423  |  |  * Section 5.2, steps 6.c through 6.g.  | 
1424  |  |  *  | 
1425  |  |  * Normalization occurs directly on the string, no new allocation is done  | 
1426  |  |  *  | 
1427  |  |  * Returns 0 or an error code  | 
1428  |  |  */  | 
1429  |  | int  | 
1430  | 138k  | xmlNormalizeURIPath(char *path) { | 
1431  | 138k  |     char *cur, *out;  | 
1432  |  |  | 
1433  | 138k  |     if (path == NULL)  | 
1434  | 0  |   return(-1);  | 
1435  |  |  | 
1436  |  |     /* Skip all initial "/" chars.  We want to get to the beginning of the  | 
1437  |  |      * first non-empty segment.  | 
1438  |  |      */  | 
1439  | 138k  |     cur = path;  | 
1440  | 213k  |     while (cur[0] == '/')  | 
1441  | 74.2k  |       ++cur;  | 
1442  | 138k  |     if (cur[0] == '\0')  | 
1443  | 5  |       return(0);  | 
1444  |  |  | 
1445  |  |     /* Keep everything we've seen so far.  */  | 
1446  | 138k  |     out = cur;  | 
1447  |  |  | 
1448  |  |     /*  | 
1449  |  |      * Analyze each segment in sequence for cases (c) and (d).  | 
1450  |  |      */  | 
1451  | 280k  |     while (cur[0] != '\0') { | 
1452  |  |   /*  | 
1453  |  |    * c) All occurrences of "./", where "." is a complete path segment,  | 
1454  |  |    *    are removed from the buffer string.  | 
1455  |  |    */  | 
1456  | 280k  |   if ((cur[0] == '.') && (cur[1] == '/')) { | 
1457  | 1.94k  |       cur += 2;  | 
1458  |  |       /* '//' normalization should be done at this point too */  | 
1459  | 12.7k  |       while (cur[0] == '/')  | 
1460  | 10.8k  |     cur++;  | 
1461  | 1.94k  |       continue;  | 
1462  | 1.94k  |   }  | 
1463  |  |  | 
1464  |  |   /*  | 
1465  |  |    * d) If the buffer string ends with "." as a complete path segment,  | 
1466  |  |    *    that "." is removed.  | 
1467  |  |    */  | 
1468  | 278k  |   if ((cur[0] == '.') && (cur[1] == '\0'))  | 
1469  | 269  |       break;  | 
1470  |  |  | 
1471  |  |   /* Otherwise keep the segment.  */  | 
1472  | 2.67M  |   while (cur[0] != '/') { | 
1473  | 2.53M  |             if (cur[0] == '\0')  | 
1474  | 138k  |               goto done_cd;  | 
1475  | 2.39M  |       (out++)[0] = (cur++)[0];  | 
1476  | 2.39M  |   }  | 
1477  |  |   /* normalize // */  | 
1478  | 194k  |   while ((cur[0] == '/') && (cur[1] == '/'))  | 
1479  | 54.7k  |       cur++;  | 
1480  |  |  | 
1481  | 139k  |         (out++)[0] = (cur++)[0];  | 
1482  | 139k  |     }  | 
1483  | 138k  |  done_cd:  | 
1484  | 138k  |     out[0] = '\0';  | 
1485  |  |  | 
1486  |  |     /* Reset to the beginning of the first segment for the next sequence.  */  | 
1487  | 138k  |     cur = path;  | 
1488  | 213k  |     while (cur[0] == '/')  | 
1489  | 74.2k  |       ++cur;  | 
1490  | 138k  |     if (cur[0] == '\0')  | 
1491  | 192  |   return(0);  | 
1492  |  |  | 
1493  |  |     /*  | 
1494  |  |      * Analyze each segment in sequence for cases (e) and (f).  | 
1495  |  |      *  | 
1496  |  |      * e) All occurrences of "<segment>/../", where <segment> is a  | 
1497  |  |      *    complete path segment not equal to "..", are removed from the  | 
1498  |  |      *    buffer string.  Removal of these path segments is performed  | 
1499  |  |      *    iteratively, removing the leftmost matching pattern on each  | 
1500  |  |      *    iteration, until no matching pattern remains.  | 
1501  |  |      *  | 
1502  |  |      * f) If the buffer string ends with "<segment>/..", where <segment>  | 
1503  |  |      *    is a complete path segment not equal to "..", that  | 
1504  |  |      *    "<segment>/.." is removed.  | 
1505  |  |      *  | 
1506  |  |      * To satisfy the "iterative" clause in (e), we need to collapse the  | 
1507  |  |      * string every time we find something that needs to be removed.  Thus,  | 
1508  |  |      * we don't need to keep two pointers into the string: we only need a  | 
1509  |  |      * "current position" pointer.  | 
1510  |  |      */  | 
1511  | 276k  |     while (1) { | 
1512  | 276k  |         char *segp, *tmp;  | 
1513  |  |  | 
1514  |  |         /* At the beginning of each iteration of this loop, "cur" points to  | 
1515  |  |          * the first character of the segment we want to examine.  | 
1516  |  |          */  | 
1517  |  |  | 
1518  |  |         /* Find the end of the current segment.  */  | 
1519  | 276k  |         segp = cur;  | 
1520  | 2.68M  |         while ((segp[0] != '/') && (segp[0] != '\0'))  | 
1521  | 2.40M  |           ++segp;  | 
1522  |  |  | 
1523  |  |         /* If this is the last segment, we're done (we need at least two  | 
1524  |  |          * segments to meet the criteria for the (e) and (f) cases).  | 
1525  |  |          */  | 
1526  | 276k  |         if (segp[0] == '\0')  | 
1527  | 138k  |           break;  | 
1528  |  |  | 
1529  |  |         /* If the first segment is "..", or if the next segment _isn't_ "..",  | 
1530  |  |          * keep this segment and try the next one.  | 
1531  |  |          */  | 
1532  | 138k  |         ++segp;  | 
1533  | 138k  |         if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))  | 
1534  | 138k  |             || ((segp[0] != '.') || (segp[1] != '.')  | 
1535  | 135k  |                 || ((segp[2] != '/') && (segp[2] != '\0')))) { | 
1536  | 133k  |           cur = segp;  | 
1537  | 133k  |           continue;  | 
1538  | 133k  |         }  | 
1539  |  |  | 
1540  |  |         /* If we get here, remove this segment and the next one and back up  | 
1541  |  |          * to the previous segment (if there is one), to implement the  | 
1542  |  |          * "iteratively" clause.  It's pretty much impossible to back up  | 
1543  |  |          * while maintaining two pointers into the buffer, so just compact  | 
1544  |  |          * the whole buffer now.  | 
1545  |  |          */  | 
1546  |  |  | 
1547  |  |         /* If this is the end of the buffer, we're done.  */  | 
1548  | 4.07k  |         if (segp[2] == '\0') { | 
1549  | 172  |           cur[0] = '\0';  | 
1550  | 172  |           break;  | 
1551  | 172  |         }  | 
1552  |  |         /* Valgrind complained, strcpy(cur, segp + 3); */  | 
1553  |  |         /* string will overlap, do not use strcpy */  | 
1554  | 3.90k  |         tmp = cur;  | 
1555  | 3.90k  |         segp += 3;  | 
1556  | 129k  |         while ((*tmp++ = *segp++) != 0)  | 
1557  | 125k  |           ;  | 
1558  |  |  | 
1559  |  |         /* If there are no previous segments, then keep going from here.  */  | 
1560  | 3.90k  |         segp = cur;  | 
1561  | 13.0k  |         while ((segp > path) && ((--segp)[0] == '/'))  | 
1562  | 9.18k  |           ;  | 
1563  | 3.90k  |         if (segp == path)  | 
1564  | 1.51k  |           continue;  | 
1565  |  |  | 
1566  |  |         /* "segp" is pointing to the end of a previous segment; find it's  | 
1567  |  |          * start.  We need to back up to the previous segment and start  | 
1568  |  |          * over with that to handle things like "foo/bar/../..".  If we  | 
1569  |  |          * don't do this, then on the first pass we'll remove the "bar/..",  | 
1570  |  |          * but be pointing at the second ".." so we won't realize we can also  | 
1571  |  |          * remove the "foo/..".  | 
1572  |  |          */  | 
1573  | 2.38k  |         cur = segp;  | 
1574  | 18.4k  |         while ((cur > path) && (cur[-1] != '/'))  | 
1575  | 16.0k  |           --cur;  | 
1576  | 2.38k  |     }  | 
1577  | 138k  |     out[0] = '\0';  | 
1578  |  |  | 
1579  |  |     /*  | 
1580  |  |      * g) If the resulting buffer string still begins with one or more  | 
1581  |  |      *    complete path segments of "..", then the reference is  | 
1582  |  |      *    considered to be in error. Implementations may handle this  | 
1583  |  |      *    error by retaining these components in the resolved path (i.e.,  | 
1584  |  |      *    treating them as part of the final URI), by removing them from  | 
1585  |  |      *    the resolved path (i.e., discarding relative levels above the  | 
1586  |  |      *    root), or by avoiding traversal of the reference.  | 
1587  |  |      *  | 
1588  |  |      * We discard them from the final path.  | 
1589  |  |      */  | 
1590  | 138k  |     if (path[0] == '/') { | 
1591  | 50.9k  |       cur = path;  | 
1592  | 52.4k  |       while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')  | 
1593  | 52.4k  |              && ((cur[3] == '/') || (cur[3] == '\0')))  | 
1594  | 1.47k  |   cur += 3;  | 
1595  |  |  | 
1596  | 50.9k  |       if (cur != path) { | 
1597  | 1.26k  |   out = path;  | 
1598  | 99.5k  |   while (cur[0] != '\0')  | 
1599  | 98.2k  |           (out++)[0] = (cur++)[0];  | 
1600  | 1.26k  |   out[0] = 0;  | 
1601  | 1.26k  |       }  | 
1602  | 50.9k  |     }  | 
1603  |  |  | 
1604  | 138k  |     return(0);  | 
1605  | 138k  | }  | 
1606  |  |  | 
1607  | 13.5M  | static int is_hex(char c) { | 
1608  | 13.5M  |     if (((c >= '0') && (c <= '9')) ||  | 
1609  | 13.5M  |         ((c >= 'a') && (c <= 'f')) ||  | 
1610  | 13.5M  |         ((c >= 'A') && (c <= 'F')))  | 
1611  | 13.5M  |   return(1);  | 
1612  | 31.8k  |     return(0);  | 
1613  | 13.5M  | }  | 
1614  |  |  | 
1615  |  | /**  | 
1616  |  |  * xmlURIUnescapeString:  | 
1617  |  |  * @str:  the string to unescape  | 
1618  |  |  * @len:   the length in bytes to unescape (or <= 0 to indicate full string)  | 
1619  |  |  * @target:  optional destination buffer  | 
1620  |  |  *  | 
1621  |  |  * Unescaping routine, but does not check that the string is an URI. The  | 
1622  |  |  * output is a direct unsigned char translation of %XX values (no encoding)  | 
1623  |  |  * Note that the length of the result can only be smaller or same size as  | 
1624  |  |  * the input string.  | 
1625  |  |  *  | 
1626  |  |  * Returns a copy of the string, but unescaped, will return NULL only in case  | 
1627  |  |  * of error  | 
1628  |  |  */  | 
1629  |  | char *  | 
1630  | 7.52M  | xmlURIUnescapeString(const char *str, int len, char *target) { | 
1631  | 7.52M  |     char *ret, *out;  | 
1632  | 7.52M  |     const char *in;  | 
1633  |  |  | 
1634  | 7.52M  |     if (str == NULL)  | 
1635  | 0  |   return(NULL);  | 
1636  | 7.52M  |     if (len <= 0) len = strlen(str);  | 
1637  | 7.52M  |     if (len < 0) return(NULL);  | 
1638  |  |  | 
1639  | 7.52M  |     if (target == NULL) { | 
1640  | 7.52M  |   ret = (char *) xmlMallocAtomic(len + 1);  | 
1641  | 7.52M  |   if (ret == NULL) { | 
1642  | 0  |             xmlURIErrMemory("unescaping URI value\n"); | 
1643  | 0  |       return(NULL);  | 
1644  | 0  |   }  | 
1645  | 7.52M  |     } else  | 
1646  | 0  |   ret = target;  | 
1647  | 7.52M  |     in = str;  | 
1648  | 7.52M  |     out = ret;  | 
1649  | 169M  |     while(len > 0) { | 
1650  | 162M  |   if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) { | 
1651  | 6.75M  |             int c = 0;  | 
1652  | 6.75M  |       in++;  | 
1653  | 6.75M  |       if ((*in >= '0') && (*in <= '9'))  | 
1654  | 4.39M  |           c = (*in - '0');  | 
1655  | 2.35M  |       else if ((*in >= 'a') && (*in <= 'f'))  | 
1656  | 75.3k  |           c = (*in - 'a') + 10;  | 
1657  | 2.28M  |       else if ((*in >= 'A') && (*in <= 'F'))  | 
1658  | 2.28M  |           c = (*in - 'A') + 10;  | 
1659  | 6.75M  |       in++;  | 
1660  | 6.75M  |       if ((*in >= '0') && (*in <= '9'))  | 
1661  | 3.97M  |           c = c * 16 + (*in - '0');  | 
1662  | 2.77M  |       else if ((*in >= 'a') && (*in <= 'f'))  | 
1663  | 75.9k  |           c = c * 16 + (*in - 'a') + 10;  | 
1664  | 2.70M  |       else if ((*in >= 'A') && (*in <= 'F'))  | 
1665  | 2.70M  |           c = c * 16 + (*in - 'A') + 10;  | 
1666  | 6.75M  |       in++;  | 
1667  | 6.75M  |       len -= 3;  | 
1668  |  |             /* Explicit sign change */  | 
1669  | 6.75M  |       *out++ = (char) c;  | 
1670  | 155M  |   } else { | 
1671  | 155M  |       *out++ = *in++;  | 
1672  | 155M  |       len--;  | 
1673  | 155M  |   }  | 
1674  | 162M  |     }  | 
1675  | 7.52M  |     *out = 0;  | 
1676  | 7.52M  |     return(ret);  | 
1677  | 7.52M  | }  | 
1678  |  |  | 
1679  |  | /**  | 
1680  |  |  * xmlURIEscapeStr:  | 
1681  |  |  * @str:  string to escape  | 
1682  |  |  * @list: exception list string of chars not to escape  | 
1683  |  |  *  | 
1684  |  |  * This routine escapes a string to hex, ignoring reserved characters  | 
1685  |  |  * (a-z, A-Z, 0-9, "@-_.!~*'()") and the characters in the exception list.  | 
1686  |  |  *  | 
1687  |  |  * Returns a new escaped string or NULL in case of error.  | 
1688  |  |  */  | 
1689  |  | xmlChar *  | 
1690  | 112k  | xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) { | 
1691  | 112k  |     xmlChar *ret, ch;  | 
1692  | 112k  |     xmlChar *temp;  | 
1693  | 112k  |     const xmlChar *in;  | 
1694  | 112k  |     int len, out;  | 
1695  |  |  | 
1696  | 112k  |     if (str == NULL)  | 
1697  | 0  |   return(NULL);  | 
1698  | 112k  |     if (str[0] == 0)  | 
1699  | 0  |   return(xmlStrdup(str));  | 
1700  | 112k  |     len = xmlStrlen(str);  | 
1701  | 112k  |     if (!(len > 0)) return(NULL);  | 
1702  |  |  | 
1703  | 112k  |     len += 20;  | 
1704  | 112k  |     ret = (xmlChar *) xmlMallocAtomic(len);  | 
1705  | 112k  |     if (ret == NULL) { | 
1706  | 0  |         xmlURIErrMemory("escaping URI value\n"); | 
1707  | 0  |   return(NULL);  | 
1708  | 0  |     }  | 
1709  | 112k  |     in = (const xmlChar *) str;  | 
1710  | 112k  |     out = 0;  | 
1711  | 13.3M  |     while(*in != 0) { | 
1712  | 13.2M  |   if (len - out <= 3) { | 
1713  | 40.5k  |             temp = xmlSaveUriRealloc(ret, &len);  | 
1714  | 40.5k  |       if (temp == NULL) { | 
1715  | 0  |                 xmlURIErrMemory("escaping URI value\n"); | 
1716  | 0  |     xmlFree(ret);  | 
1717  | 0  |     return(NULL);  | 
1718  | 0  |       }  | 
1719  | 40.5k  |       ret = temp;  | 
1720  | 40.5k  |   }  | 
1721  |  |  | 
1722  | 13.2M  |   ch = *in;  | 
1723  |  |  | 
1724  | 13.2M  |   if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) { | 
1725  | 3.91M  |       unsigned char val;  | 
1726  | 3.91M  |       ret[out++] = '%';  | 
1727  | 3.91M  |       val = ch >> 4;  | 
1728  | 3.91M  |       if (val <= 9)  | 
1729  | 2.93M  |     ret[out++] = '0' + val;  | 
1730  | 974k  |       else  | 
1731  | 974k  |     ret[out++] = 'A' + val - 0xA;  | 
1732  | 3.91M  |       val = ch & 0xF;  | 
1733  | 3.91M  |       if (val <= 9)  | 
1734  | 2.45M  |     ret[out++] = '0' + val;  | 
1735  | 1.45M  |       else  | 
1736  | 1.45M  |     ret[out++] = 'A' + val - 0xA;  | 
1737  | 3.91M  |       in++;  | 
1738  | 9.30M  |   } else { | 
1739  | 9.30M  |       ret[out++] = *in++;  | 
1740  | 9.30M  |   }  | 
1741  |  |  | 
1742  | 13.2M  |     }  | 
1743  | 112k  |     ret[out] = 0;  | 
1744  | 112k  |     return(ret);  | 
1745  | 112k  | }  | 
1746  |  |  | 
1747  |  | /**  | 
1748  |  |  * xmlURIEscape:  | 
1749  |  |  * @str:  the string of the URI to escape  | 
1750  |  |  *  | 
1751  |  |  * Escaping routine, does not do validity checks !  | 
1752  |  |  * It will try to escape the chars needing this, but this is heuristic  | 
1753  |  |  * based it's impossible to be sure.  | 
1754  |  |  *  | 
1755  |  |  * Returns an copy of the string, but escaped  | 
1756  |  |  *  | 
1757  |  |  * 25 May 2001  | 
1758  |  |  * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly  | 
1759  |  |  * according to RFC2396.  | 
1760  |  |  *   - Carl Douglas  | 
1761  |  |  */  | 
1762  |  | xmlChar *  | 
1763  |  | xmlURIEscape(const xmlChar * str)  | 
1764  | 0  | { | 
1765  | 0  |     xmlChar *ret, *segment = NULL;  | 
1766  | 0  |     xmlURIPtr uri;  | 
1767  | 0  |     int ret2;  | 
1768  |  | 
  | 
1769  | 0  |     if (str == NULL)  | 
1770  | 0  |         return (NULL);  | 
1771  |  |  | 
1772  | 0  |     uri = xmlCreateURI();  | 
1773  | 0  |     if (uri != NULL) { | 
1774  |  |   /*  | 
1775  |  |    * Allow escaping errors in the unescaped form  | 
1776  |  |    */  | 
1777  | 0  |         uri->cleanup = 1;  | 
1778  | 0  |         ret2 = xmlParseURIReference(uri, (const char *)str);  | 
1779  | 0  |         if (ret2) { | 
1780  | 0  |             xmlFreeURI(uri);  | 
1781  | 0  |             return (NULL);  | 
1782  | 0  |         }  | 
1783  | 0  |     }  | 
1784  |  |  | 
1785  | 0  |     if (!uri)  | 
1786  | 0  |         return NULL;  | 
1787  |  |  | 
1788  | 0  |     ret = NULL;  | 
1789  |  | 
  | 
1790  | 0  | #define NULLCHK(p) if(!p) { \ | 
1791  | 0  |          xmlURIErrMemory("escaping URI value\n"); \ | 
1792  | 0  |          xmlFreeURI(uri); \  | 
1793  | 0  |          xmlFree(ret); \  | 
1794  | 0  |          return NULL; } \  | 
1795  | 0  | 
  | 
1796  | 0  |     if (uri->scheme) { | 
1797  | 0  |         segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");  | 
1798  | 0  |         NULLCHK(segment)  | 
1799  | 0  |         ret = xmlStrcat(ret, segment);  | 
1800  | 0  |         ret = xmlStrcat(ret, BAD_CAST ":");  | 
1801  | 0  |         xmlFree(segment);  | 
1802  | 0  |     }  | 
1803  |  |  | 
1804  | 0  |     if (uri->authority) { | 
1805  | 0  |         segment =  | 
1806  | 0  |             xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");  | 
1807  | 0  |         NULLCHK(segment)  | 
1808  | 0  |         ret = xmlStrcat(ret, BAD_CAST "//");  | 
1809  | 0  |         ret = xmlStrcat(ret, segment);  | 
1810  | 0  |         xmlFree(segment);  | 
1811  | 0  |     }  | 
1812  |  |  | 
1813  | 0  |     if (uri->user) { | 
1814  | 0  |         segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");  | 
1815  | 0  |         NULLCHK(segment)  | 
1816  | 0  |         ret = xmlStrcat(ret,BAD_CAST "//");  | 
1817  | 0  |         ret = xmlStrcat(ret, segment);  | 
1818  | 0  |         ret = xmlStrcat(ret, BAD_CAST "@");  | 
1819  | 0  |         xmlFree(segment);  | 
1820  | 0  |     }  | 
1821  |  |  | 
1822  | 0  |     if (uri->server) { | 
1823  | 0  |         segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");  | 
1824  | 0  |         NULLCHK(segment)  | 
1825  | 0  |         if (uri->user == NULL)  | 
1826  | 0  |             ret = xmlStrcat(ret, BAD_CAST "//");  | 
1827  | 0  |         ret = xmlStrcat(ret, segment);  | 
1828  | 0  |         xmlFree(segment);  | 
1829  | 0  |     }  | 
1830  |  |  | 
1831  | 0  |     if (uri->port > 0) { | 
1832  | 0  |         xmlChar port[11];  | 
1833  |  | 
  | 
1834  | 0  |         snprintf((char *) port, 11, "%d", uri->port);  | 
1835  | 0  |         ret = xmlStrcat(ret, BAD_CAST ":");  | 
1836  | 0  |         ret = xmlStrcat(ret, port);  | 
1837  | 0  |     }  | 
1838  |  | 
  | 
1839  | 0  |     if (uri->path) { | 
1840  | 0  |         segment =  | 
1841  | 0  |             xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");  | 
1842  | 0  |         NULLCHK(segment)  | 
1843  | 0  |         ret = xmlStrcat(ret, segment);  | 
1844  | 0  |         xmlFree(segment);  | 
1845  | 0  |     }  | 
1846  |  |  | 
1847  | 0  |     if (uri->query_raw) { | 
1848  | 0  |         ret = xmlStrcat(ret, BAD_CAST "?");  | 
1849  | 0  |         ret = xmlStrcat(ret, BAD_CAST uri->query_raw);  | 
1850  | 0  |     }  | 
1851  | 0  |     else if (uri->query) { | 
1852  | 0  |         segment =  | 
1853  | 0  |             xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");  | 
1854  | 0  |         NULLCHK(segment)  | 
1855  | 0  |         ret = xmlStrcat(ret, BAD_CAST "?");  | 
1856  | 0  |         ret = xmlStrcat(ret, segment);  | 
1857  | 0  |         xmlFree(segment);  | 
1858  | 0  |     }  | 
1859  |  |  | 
1860  | 0  |     if (uri->opaque) { | 
1861  | 0  |         segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");  | 
1862  | 0  |         NULLCHK(segment)  | 
1863  | 0  |         ret = xmlStrcat(ret, segment);  | 
1864  | 0  |         xmlFree(segment);  | 
1865  | 0  |     }  | 
1866  |  |  | 
1867  | 0  |     if (uri->fragment) { | 
1868  | 0  |         segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");  | 
1869  | 0  |         NULLCHK(segment)  | 
1870  | 0  |         ret = xmlStrcat(ret, BAD_CAST "#");  | 
1871  | 0  |         ret = xmlStrcat(ret, segment);  | 
1872  | 0  |         xmlFree(segment);  | 
1873  | 0  |     }  | 
1874  |  |  | 
1875  | 0  |     xmlFreeURI(uri);  | 
1876  | 0  | #undef NULLCHK  | 
1877  |  | 
  | 
1878  | 0  |     return (ret);  | 
1879  | 0  | }  | 
1880  |  |  | 
1881  |  | /************************************************************************  | 
1882  |  |  *                  *  | 
1883  |  |  *      Public functions        *  | 
1884  |  |  *                  *  | 
1885  |  |  ************************************************************************/  | 
1886  |  |  | 
1887  |  | /**  | 
1888  |  |  * xmlBuildURI:  | 
1889  |  |  * @URI:  the URI instance found in the document  | 
1890  |  |  * @base:  the base value  | 
1891  |  |  *  | 
1892  |  |  * Computes he final URI of the reference done by checking that  | 
1893  |  |  * the given URI is valid, and building the final URI using the  | 
1894  |  |  * base URI. This is processed according to section 5.2 of the  | 
1895  |  |  * RFC 2396  | 
1896  |  |  *  | 
1897  |  |  * 5.2. Resolving Relative References to Absolute Form  | 
1898  |  |  *  | 
1899  |  |  * Returns a new URI string (to be freed by the caller) or NULL in case  | 
1900  |  |  *         of error.  | 
1901  |  |  */  | 
1902  |  | xmlChar *  | 
1903  | 1.32M  | xmlBuildURI(const xmlChar *URI, const xmlChar *base) { | 
1904  | 1.32M  |     xmlChar *val = NULL;  | 
1905  | 1.32M  |     int ret, len, indx, cur, out;  | 
1906  | 1.32M  |     xmlURIPtr ref = NULL;  | 
1907  | 1.32M  |     xmlURIPtr bas = NULL;  | 
1908  | 1.32M  |     xmlURIPtr res = NULL;  | 
1909  |  |  | 
1910  |  |     /*  | 
1911  |  |      * 1) The URI reference is parsed into the potential four components and  | 
1912  |  |      *    fragment identifier, as described in Section 4.3.  | 
1913  |  |      *  | 
1914  |  |      *    NOTE that a completely empty URI is treated by modern browsers  | 
1915  |  |      *    as a reference to "." rather than as a synonym for the current  | 
1916  |  |      *    URI.  Should we do that here?  | 
1917  |  |      */  | 
1918  | 1.32M  |     if (URI == NULL)  | 
1919  | 653  |   ret = -1;  | 
1920  | 1.32M  |     else { | 
1921  | 1.32M  |   if (*URI) { | 
1922  | 1.31M  |       ref = xmlCreateURI();  | 
1923  | 1.31M  |       if (ref == NULL)  | 
1924  | 0  |     goto done;  | 
1925  | 1.31M  |       ret = xmlParseURIReference(ref, (const char *) URI);  | 
1926  | 1.31M  |   }  | 
1927  | 4.48k  |   else  | 
1928  | 4.48k  |       ret = 0;  | 
1929  | 1.32M  |     }  | 
1930  | 1.32M  |     if (ret != 0)  | 
1931  | 26.2k  |   goto done;  | 
1932  | 1.29M  |     if ((ref != NULL) && (ref->scheme != NULL)) { | 
1933  |  |   /*  | 
1934  |  |    * The URI is absolute don't modify.  | 
1935  |  |    */  | 
1936  | 76.6k  |   val = xmlStrdup(URI);  | 
1937  | 76.6k  |   goto done;  | 
1938  | 76.6k  |     }  | 
1939  | 1.22M  |     if (base == NULL)  | 
1940  | 932k  |   ret = -1;  | 
1941  | 289k  |     else { | 
1942  | 289k  |   bas = xmlCreateURI();  | 
1943  | 289k  |   if (bas == NULL)  | 
1944  | 0  |       goto done;  | 
1945  | 289k  |   ret = xmlParseURIReference(bas, (const char *) base);  | 
1946  | 289k  |     }  | 
1947  | 1.22M  |     if (ret != 0) { | 
1948  | 1.07M  |   if (ref)  | 
1949  | 1.07M  |       val = xmlSaveUri(ref);  | 
1950  | 1.07M  |   goto done;  | 
1951  | 1.07M  |     }  | 
1952  | 144k  |     if (ref == NULL) { | 
1953  |  |   /*  | 
1954  |  |    * the base fragment must be ignored  | 
1955  |  |    */  | 
1956  | 2.19k  |   if (bas->fragment != NULL) { | 
1957  | 173  |       xmlFree(bas->fragment);  | 
1958  | 173  |       bas->fragment = NULL;  | 
1959  | 173  |   }  | 
1960  | 2.19k  |   val = xmlSaveUri(bas);  | 
1961  | 2.19k  |   goto done;  | 
1962  | 2.19k  |     }  | 
1963  |  |  | 
1964  |  |     /*  | 
1965  |  |      * 2) If the path component is empty and the scheme, authority, and  | 
1966  |  |      *    query components are undefined, then it is a reference to the  | 
1967  |  |      *    current document and we are done.  Otherwise, the reference URI's  | 
1968  |  |      *    query and fragment components are defined as found (or not found)  | 
1969  |  |      *    within the URI reference and not inherited from the base URI.  | 
1970  |  |      *  | 
1971  |  |      *    NOTE that in modern browsers, the parsing differs from the above  | 
1972  |  |      *    in the following aspect:  the query component is allowed to be  | 
1973  |  |      *    defined while still treating this as a reference to the current  | 
1974  |  |      *    document.  | 
1975  |  |      */  | 
1976  | 142k  |     res = xmlCreateURI();  | 
1977  | 142k  |     if (res == NULL)  | 
1978  | 0  |   goto done;  | 
1979  | 142k  |     if ((ref->scheme == NULL) && (ref->path == NULL) &&  | 
1980  | 142k  |   ((ref->authority == NULL) && (ref->server == NULL) &&  | 
1981  | 2.86k  |          (ref->port == PORT_EMPTY))) { | 
1982  | 2.52k  |   if (bas->scheme != NULL)  | 
1983  | 1.30k  |       res->scheme = xmlMemStrdup(bas->scheme);  | 
1984  | 2.52k  |   if (bas->authority != NULL)  | 
1985  | 0  |       res->authority = xmlMemStrdup(bas->authority);  | 
1986  | 2.52k  |   else { | 
1987  | 2.52k  |       if (bas->server != NULL)  | 
1988  | 786  |     res->server = xmlMemStrdup(bas->server);  | 
1989  | 2.52k  |       if (bas->user != NULL)  | 
1990  | 694  |     res->user = xmlMemStrdup(bas->user);  | 
1991  | 2.52k  |       res->port = bas->port;  | 
1992  | 2.52k  |   }  | 
1993  | 2.52k  |   if (bas->path != NULL)  | 
1994  | 1.29k  |       res->path = xmlMemStrdup(bas->path);  | 
1995  | 2.52k  |   if (ref->query_raw != NULL)  | 
1996  | 1.33k  |       res->query_raw = xmlMemStrdup (ref->query_raw);  | 
1997  | 1.18k  |   else if (ref->query != NULL)  | 
1998  | 0  |       res->query = xmlMemStrdup(ref->query);  | 
1999  | 1.18k  |   else if (bas->query_raw != NULL)  | 
2000  | 230  |       res->query_raw = xmlMemStrdup(bas->query_raw);  | 
2001  | 957  |   else if (bas->query != NULL)  | 
2002  | 0  |       res->query = xmlMemStrdup(bas->query);  | 
2003  | 2.52k  |   if (ref->fragment != NULL)  | 
2004  | 1.60k  |       res->fragment = xmlMemStrdup(ref->fragment);  | 
2005  | 2.52k  |   goto step_7;  | 
2006  | 2.52k  |     }  | 
2007  |  |  | 
2008  |  |     /*  | 
2009  |  |      * 3) If the scheme component is defined, indicating that the reference  | 
2010  |  |      *    starts with a scheme name, then the reference is interpreted as an  | 
2011  |  |      *    absolute URI and we are done.  Otherwise, the reference URI's  | 
2012  |  |      *    scheme is inherited from the base URI's scheme component.  | 
2013  |  |      */  | 
2014  | 140k  |     if (ref->scheme != NULL) { | 
2015  | 0  |   val = xmlSaveUri(ref);  | 
2016  | 0  |   goto done;  | 
2017  | 0  |     }  | 
2018  | 140k  |     if (bas->scheme != NULL)  | 
2019  | 64.1k  |   res->scheme = xmlMemStrdup(bas->scheme);  | 
2020  |  |  | 
2021  | 140k  |     if (ref->query_raw != NULL)  | 
2022  | 1.16k  |   res->query_raw = xmlMemStrdup(ref->query_raw);  | 
2023  | 139k  |     else if (ref->query != NULL)  | 
2024  | 0  |   res->query = xmlMemStrdup(ref->query);  | 
2025  | 140k  |     if (ref->fragment != NULL)  | 
2026  | 3.27k  |   res->fragment = xmlMemStrdup(ref->fragment);  | 
2027  |  |  | 
2028  |  |     /*  | 
2029  |  |      * 4) If the authority component is defined, then the reference is a  | 
2030  |  |      *    network-path and we skip to step 7.  Otherwise, the reference  | 
2031  |  |      *    URI's authority is inherited from the base URI's authority  | 
2032  |  |      *    component, which will also be undefined if the URI scheme does not  | 
2033  |  |      *    use an authority component.  | 
2034  |  |      */  | 
2035  | 140k  |     if ((ref->authority != NULL) || (ref->server != NULL) ||  | 
2036  | 140k  |          (ref->port != PORT_EMPTY)) { | 
2037  | 500  |   if (ref->authority != NULL)  | 
2038  | 0  |       res->authority = xmlMemStrdup(ref->authority);  | 
2039  | 500  |   else { | 
2040  | 500  |             if (ref->server != NULL)  | 
2041  | 494  |                 res->server = xmlMemStrdup(ref->server);  | 
2042  | 500  |       if (ref->user != NULL)  | 
2043  | 266  |     res->user = xmlMemStrdup(ref->user);  | 
2044  | 500  |             res->port = ref->port;  | 
2045  | 500  |   }  | 
2046  | 500  |   if (ref->path != NULL)  | 
2047  | 162  |       res->path = xmlMemStrdup(ref->path);  | 
2048  | 500  |   goto step_7;  | 
2049  | 500  |     }  | 
2050  | 139k  |     if (bas->authority != NULL)  | 
2051  | 0  |   res->authority = xmlMemStrdup(bas->authority);  | 
2052  | 139k  |     else if ((bas->server != NULL) || (bas->port != PORT_EMPTY)) { | 
2053  | 42.2k  |   if (bas->server != NULL)  | 
2054  | 32.1k  |       res->server = xmlMemStrdup(bas->server);  | 
2055  | 42.2k  |   if (bas->user != NULL)  | 
2056  | 9.14k  |       res->user = xmlMemStrdup(bas->user);  | 
2057  | 42.2k  |   res->port = bas->port;  | 
2058  | 42.2k  |     }  | 
2059  |  |  | 
2060  |  |     /*  | 
2061  |  |      * 5) If the path component begins with a slash character ("/"), then | 
2062  |  |      *    the reference is an absolute-path and we skip to step 7.  | 
2063  |  |      */  | 
2064  | 139k  |     if ((ref->path != NULL) && (ref->path[0] == '/')) { | 
2065  | 793  |   res->path = xmlMemStrdup(ref->path);  | 
2066  | 793  |   goto step_7;  | 
2067  | 793  |     }  | 
2068  |  |  | 
2069  |  |  | 
2070  |  |     /*  | 
2071  |  |      * 6) If this step is reached, then we are resolving a relative-path  | 
2072  |  |      *    reference.  The relative path needs to be merged with the base  | 
2073  |  |      *    URI's path.  Although there are many ways to do this, we will  | 
2074  |  |      *    describe a simple method using a separate string buffer.  | 
2075  |  |      *  | 
2076  |  |      * Allocate a buffer large enough for the result string.  | 
2077  |  |      */  | 
2078  | 138k  |     len = 2; /* extra / and 0 */  | 
2079  | 138k  |     if (ref->path != NULL)  | 
2080  | 138k  |   len += strlen(ref->path);  | 
2081  | 138k  |     if (bas->path != NULL)  | 
2082  | 75.8k  |   len += strlen(bas->path);  | 
2083  | 138k  |     res->path = (char *) xmlMallocAtomic(len);  | 
2084  | 138k  |     if (res->path == NULL) { | 
2085  | 0  |         xmlURIErrMemory("resolving URI against base\n"); | 
2086  | 0  |   goto done;  | 
2087  | 0  |     }  | 
2088  | 138k  |     res->path[0] = 0;  | 
2089  |  |  | 
2090  |  |     /*  | 
2091  |  |      * a) All but the last segment of the base URI's path component is  | 
2092  |  |      *    copied to the buffer.  In other words, any characters after the  | 
2093  |  |      *    last (right-most) slash character, if any, are excluded.  | 
2094  |  |      */  | 
2095  | 138k  |     cur = 0;  | 
2096  | 138k  |     out = 0;  | 
2097  | 138k  |     if (bas->path != NULL) { | 
2098  | 243k  |   while (bas->path[cur] != 0) { | 
2099  | 1.54M  |       while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))  | 
2100  | 1.31M  |     cur++;  | 
2101  | 236k  |       if (bas->path[cur] == 0)  | 
2102  | 68.4k  |     break;  | 
2103  |  |  | 
2104  | 167k  |       cur++;  | 
2105  | 1.04M  |       while (out < cur) { | 
2106  | 879k  |     res->path[out] = bas->path[out];  | 
2107  | 879k  |     out++;  | 
2108  | 879k  |       }  | 
2109  | 167k  |   }  | 
2110  | 75.8k  |     }  | 
2111  | 138k  |     res->path[out] = 0;  | 
2112  |  |  | 
2113  |  |     /*  | 
2114  |  |      * b) The reference's path component is appended to the buffer  | 
2115  |  |      *    string.  | 
2116  |  |      */  | 
2117  | 138k  |     if (ref->path != NULL && ref->path[0] != 0) { | 
2118  | 138k  |   indx = 0;  | 
2119  |  |   /*  | 
2120  |  |    * Ensure the path includes a '/'  | 
2121  |  |    */  | 
2122  | 138k  |   if ((out == 0) && ((bas->server != NULL) || bas->port != PORT_EMPTY))  | 
2123  | 34.3k  |       res->path[out++] = '/';  | 
2124  | 1.90M  |   while (ref->path[indx] != 0) { | 
2125  | 1.76M  |       res->path[out++] = ref->path[indx++];  | 
2126  | 1.76M  |   }  | 
2127  | 138k  |     }  | 
2128  | 138k  |     res->path[out] = 0;  | 
2129  |  |  | 
2130  |  |     /*  | 
2131  |  |      * Steps c) to h) are really path normalization steps  | 
2132  |  |      */  | 
2133  | 138k  |     xmlNormalizeURIPath(res->path);  | 
2134  |  |  | 
2135  | 142k  | step_7:  | 
2136  |  |  | 
2137  |  |     /*  | 
2138  |  |      * 7) The resulting URI components, including any inherited from the  | 
2139  |  |      *    base URI, are recombined to give the absolute form of the URI  | 
2140  |  |      *    reference.  | 
2141  |  |      */  | 
2142  | 142k  |     val = xmlSaveUri(res);  | 
2143  |  |  | 
2144  | 1.32M  | done:  | 
2145  | 1.32M  |     if (ref != NULL)  | 
2146  | 1.31M  |   xmlFreeURI(ref);  | 
2147  | 1.32M  |     if (bas != NULL)  | 
2148  | 289k  |   xmlFreeURI(bas);  | 
2149  | 1.32M  |     if (res != NULL)  | 
2150  | 142k  |   xmlFreeURI(res);  | 
2151  | 1.32M  |     return(val);  | 
2152  | 142k  | }  | 
2153  |  |  | 
2154  |  | /**  | 
2155  |  |  * xmlBuildRelativeURI:  | 
2156  |  |  * @URI:  the URI reference under consideration  | 
2157  |  |  * @base:  the base value  | 
2158  |  |  *  | 
2159  |  |  * Expresses the URI of the reference in terms relative to the  | 
2160  |  |  * base.  Some examples of this operation include:  | 
2161  |  |  *     base = "http://site1.com/docs/book1.html"  | 
2162  |  |  *        URI input                        URI returned  | 
2163  |  |  *     docs/pic1.gif                    pic1.gif  | 
2164  |  |  *     docs/img/pic1.gif                img/pic1.gif  | 
2165  |  |  *     img/pic1.gif                     ../img/pic1.gif  | 
2166  |  |  *     http://site1.com/docs/pic1.gif   pic1.gif  | 
2167  |  |  *     http://site2.com/docs/pic1.gif   http://site2.com/docs/pic1.gif  | 
2168  |  |  *  | 
2169  |  |  *     base = "docs/book1.html"  | 
2170  |  |  *        URI input                        URI returned  | 
2171  |  |  *     docs/pic1.gif                    pic1.gif  | 
2172  |  |  *     docs/img/pic1.gif                img/pic1.gif  | 
2173  |  |  *     img/pic1.gif                     ../img/pic1.gif  | 
2174  |  |  *     http://site1.com/docs/pic1.gif   http://site1.com/docs/pic1.gif  | 
2175  |  |  *  | 
2176  |  |  *  | 
2177  |  |  * Note: if the URI reference is really weird or complicated, it may be  | 
2178  |  |  *       worthwhile to first convert it into a "nice" one by calling  | 
2179  |  |  *       xmlBuildURI (using 'base') before calling this routine,  | 
2180  |  |  *       since this routine (for reasonable efficiency) assumes URI has  | 
2181  |  |  *       already been through some validation.  | 
2182  |  |  *  | 
2183  |  |  * Returns a new URI string (to be freed by the caller) or NULL in case  | 
2184  |  |  * error.  | 
2185  |  |  */  | 
2186  |  | xmlChar *  | 
2187  |  | xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)  | 
2188  | 0  | { | 
2189  | 0  |     xmlChar *val = NULL;  | 
2190  | 0  |     int ret;  | 
2191  | 0  |     int ix;  | 
2192  | 0  |     int nbslash = 0;  | 
2193  | 0  |     int len;  | 
2194  | 0  |     xmlURIPtr ref = NULL;  | 
2195  | 0  |     xmlURIPtr bas = NULL;  | 
2196  | 0  |     xmlChar *bptr, *uptr, *vptr;  | 
2197  | 0  |     int remove_path = 0;  | 
2198  |  | 
  | 
2199  | 0  |     if ((URI == NULL) || (*URI == 0))  | 
2200  | 0  |   return NULL;  | 
2201  |  |  | 
2202  |  |     /*  | 
2203  |  |      * First parse URI into a standard form  | 
2204  |  |      */  | 
2205  | 0  |     ref = xmlCreateURI ();  | 
2206  | 0  |     if (ref == NULL)  | 
2207  | 0  |   return NULL;  | 
2208  |  |     /* If URI not already in "relative" form */  | 
2209  | 0  |     if (URI[0] != '.') { | 
2210  | 0  |   ret = xmlParseURIReference (ref, (const char *) URI);  | 
2211  | 0  |   if (ret != 0)  | 
2212  | 0  |       goto done;   /* Error in URI, return NULL */  | 
2213  | 0  |     } else  | 
2214  | 0  |   ref->path = (char *)xmlStrdup(URI);  | 
2215  |  |  | 
2216  |  |     /*  | 
2217  |  |      * Next parse base into the same standard form  | 
2218  |  |      */  | 
2219  | 0  |     if ((base == NULL) || (*base == 0)) { | 
2220  | 0  |   val = xmlStrdup (URI);  | 
2221  | 0  |   goto done;  | 
2222  | 0  |     }  | 
2223  | 0  |     bas = xmlCreateURI ();  | 
2224  | 0  |     if (bas == NULL)  | 
2225  | 0  |   goto done;  | 
2226  | 0  |     if (base[0] != '.') { | 
2227  | 0  |   ret = xmlParseURIReference (bas, (const char *) base);  | 
2228  | 0  |   if (ret != 0)  | 
2229  | 0  |       goto done;   /* Error in base, return NULL */  | 
2230  | 0  |     } else  | 
2231  | 0  |   bas->path = (char *)xmlStrdup(base);  | 
2232  |  |  | 
2233  |  |     /*  | 
2234  |  |      * If the scheme / server on the URI differs from the base,  | 
2235  |  |      * just return the URI  | 
2236  |  |      */  | 
2237  | 0  |     if ((ref->scheme != NULL) &&  | 
2238  | 0  |   ((bas->scheme == NULL) ||  | 
2239  | 0  |    (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||  | 
2240  | 0  |    (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)) ||  | 
2241  | 0  |          (bas->port != ref->port))) { | 
2242  | 0  |   val = xmlStrdup (URI);  | 
2243  | 0  |   goto done;  | 
2244  | 0  |     }  | 
2245  | 0  |     if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) { | 
2246  | 0  |   val = xmlStrdup(BAD_CAST "");  | 
2247  | 0  |   goto done;  | 
2248  | 0  |     }  | 
2249  | 0  |     if (bas->path == NULL) { | 
2250  | 0  |   val = xmlStrdup((xmlChar *)ref->path);  | 
2251  | 0  |   goto done;  | 
2252  | 0  |     }  | 
2253  | 0  |     if (ref->path == NULL) { | 
2254  | 0  |         ref->path = (char *) "/";  | 
2255  | 0  |   remove_path = 1;  | 
2256  | 0  |     }  | 
2257  |  |  | 
2258  |  |     /*  | 
2259  |  |      * At this point (at last!) we can compare the two paths  | 
2260  |  |      *  | 
2261  |  |      * First we take care of the special case where either of the  | 
2262  |  |      * two path components may be missing (bug 316224)  | 
2263  |  |      */  | 
2264  | 0  |     bptr = (xmlChar *)bas->path;  | 
2265  | 0  |     { | 
2266  | 0  |         xmlChar *rptr = (xmlChar *) ref->path;  | 
2267  | 0  |         int pos = 0;  | 
2268  |  |  | 
2269  |  |         /*  | 
2270  |  |          * Next we compare the two strings and find where they first differ  | 
2271  |  |          */  | 
2272  | 0  |   if ((*rptr == '.') && (rptr[1] == '/'))  | 
2273  | 0  |             rptr += 2;  | 
2274  | 0  |   if ((*bptr == '.') && (bptr[1] == '/'))  | 
2275  | 0  |             bptr += 2;  | 
2276  | 0  |   else if ((*bptr == '/') && (*rptr != '/'))  | 
2277  | 0  |       bptr++;  | 
2278  | 0  |   while ((bptr[pos] == rptr[pos]) && (bptr[pos] != 0))  | 
2279  | 0  |       pos++;  | 
2280  |  | 
  | 
2281  | 0  |   if (bptr[pos] == rptr[pos]) { | 
2282  | 0  |       val = xmlStrdup(BAD_CAST "");  | 
2283  | 0  |       goto done;    /* (I can't imagine why anyone would do this) */  | 
2284  | 0  |   }  | 
2285  |  |  | 
2286  |  |   /*  | 
2287  |  |    * In URI, "back up" to the last '/' encountered.  This will be the  | 
2288  |  |    * beginning of the "unique" suffix of URI  | 
2289  |  |    */  | 
2290  | 0  |   ix = pos;  | 
2291  | 0  |   for (; ix > 0; ix--) { | 
2292  | 0  |       if (rptr[ix - 1] == '/')  | 
2293  | 0  |     break;  | 
2294  | 0  |   }  | 
2295  | 0  |   uptr = (xmlChar *)&rptr[ix];  | 
2296  |  |  | 
2297  |  |   /*  | 
2298  |  |    * In base, count the number of '/' from the differing point  | 
2299  |  |    */  | 
2300  | 0  |   for (; bptr[ix] != 0; ix++) { | 
2301  | 0  |       if (bptr[ix] == '/')  | 
2302  | 0  |     nbslash++;  | 
2303  | 0  |   }  | 
2304  |  |  | 
2305  |  |   /*  | 
2306  |  |    * e.g: URI="foo/" base="foo/bar" -> "./"  | 
2307  |  |    */  | 
2308  | 0  |   if (nbslash == 0 && !uptr[0]) { | 
2309  | 0  |       val = xmlStrdup(BAD_CAST "./");  | 
2310  | 0  |       goto done;  | 
2311  | 0  |   }  | 
2312  |  |  | 
2313  | 0  |   len = xmlStrlen (uptr) + 1;  | 
2314  | 0  |     }  | 
2315  |  |  | 
2316  | 0  |     if (nbslash == 0) { | 
2317  | 0  |   if (uptr != NULL)  | 
2318  |  |       /* exception characters from xmlSaveUri */  | 
2319  | 0  |       val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");  | 
2320  | 0  |   goto done;  | 
2321  | 0  |     }  | 
2322  |  |  | 
2323  |  |     /*  | 
2324  |  |      * Allocate just enough space for the returned string -  | 
2325  |  |      * length of the remainder of the URI, plus enough space  | 
2326  |  |      * for the "../" groups, plus one for the terminator  | 
2327  |  |      */  | 
2328  | 0  |     val = (xmlChar *) xmlMalloc (len + 3 * nbslash);  | 
2329  | 0  |     if (val == NULL) { | 
2330  | 0  |         xmlURIErrMemory("building relative URI\n"); | 
2331  | 0  |   goto done;  | 
2332  | 0  |     }  | 
2333  | 0  |     vptr = val;  | 
2334  |  |     /*  | 
2335  |  |      * Put in as many "../" as needed  | 
2336  |  |      */  | 
2337  | 0  |     for (; nbslash>0; nbslash--) { | 
2338  | 0  |   *vptr++ = '.';  | 
2339  | 0  |   *vptr++ = '.';  | 
2340  | 0  |   *vptr++ = '/';  | 
2341  | 0  |     }  | 
2342  |  |     /*  | 
2343  |  |      * Finish up with the end of the URI  | 
2344  |  |      */  | 
2345  | 0  |     if (uptr != NULL) { | 
2346  | 0  |         if ((vptr > val) && (len > 0) &&  | 
2347  | 0  |       (uptr[0] == '/') && (vptr[-1] == '/')) { | 
2348  | 0  |       memcpy (vptr, uptr + 1, len - 1);  | 
2349  | 0  |       vptr[len - 2] = 0;  | 
2350  | 0  |   } else { | 
2351  | 0  |       memcpy (vptr, uptr, len);  | 
2352  | 0  |       vptr[len - 1] = 0;  | 
2353  | 0  |   }  | 
2354  | 0  |     } else { | 
2355  | 0  |   vptr[len - 1] = 0;  | 
2356  | 0  |     }  | 
2357  |  |  | 
2358  |  |     /* escape the freshly-built path */  | 
2359  | 0  |     vptr = val;  | 
2360  |  |   /* exception characters from xmlSaveUri */  | 
2361  | 0  |     val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");  | 
2362  | 0  |     xmlFree(vptr);  | 
2363  |  | 
  | 
2364  | 0  | done:  | 
2365  |  |     /*  | 
2366  |  |      * Free the working variables  | 
2367  |  |      */  | 
2368  | 0  |     if (remove_path != 0)  | 
2369  | 0  |         ref->path = NULL;  | 
2370  | 0  |     if (ref != NULL)  | 
2371  | 0  |   xmlFreeURI (ref);  | 
2372  | 0  |     if (bas != NULL)  | 
2373  | 0  |   xmlFreeURI (bas);  | 
2374  |  | 
  | 
2375  | 0  |     return val;  | 
2376  | 0  | }  | 
2377  |  |  | 
2378  |  | /**  | 
2379  |  |  * xmlCanonicPath:  | 
2380  |  |  * @path:  the resource locator in a filesystem notation  | 
2381  |  |  *  | 
2382  |  |  * Constructs a canonic path from the specified path.  | 
2383  |  |  *  | 
2384  |  |  * Returns a new canonic path, or a duplicate of the path parameter if the  | 
2385  |  |  * construction fails. The caller is responsible for freeing the memory occupied  | 
2386  |  |  * by the returned string. If there is insufficient memory available, or the  | 
2387  |  |  * argument is NULL, the function returns NULL.  | 
2388  |  |  */  | 
2389  |  | #define IS_WINDOWS_PATH(p)          \  | 
2390  |  |   ((p != NULL) &&           \  | 
2391  |  |    (((p[0] >= 'a') && (p[0] <= 'z')) ||     \  | 
2392  |  |     ((p[0] >= 'A') && (p[0] <= 'Z'))) &&      \  | 
2393  |  |    (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))  | 
2394  |  | xmlChar *  | 
2395  |  | xmlCanonicPath(const xmlChar *path)  | 
2396  | 1.94M  | { | 
2397  |  | /*  | 
2398  |  |  * For Windows implementations, additional work needs to be done to  | 
2399  |  |  * replace backslashes in pathnames with "forward slashes"  | 
2400  |  |  */  | 
2401  |  | #if defined(_WIN32)  | 
2402  |  |     int len = 0;  | 
2403  |  |     char *p = NULL;  | 
2404  |  | #endif  | 
2405  | 1.94M  |     xmlURIPtr uri;  | 
2406  | 1.94M  |     xmlChar *ret;  | 
2407  | 1.94M  |     const xmlChar *absuri;  | 
2408  |  |  | 
2409  | 1.94M  |     if (path == NULL)  | 
2410  | 0  |   return(NULL);  | 
2411  |  |  | 
2412  |  | #if defined(_WIN32)  | 
2413  |  |     /*  | 
2414  |  |      * We must not change the backslashes to slashes if the the path  | 
2415  |  |      * starts with \\?\  | 
2416  |  |      * Those paths can be up to 32k characters long.  | 
2417  |  |      * Was added specifically for OpenOffice, those paths can't be converted  | 
2418  |  |      * to URIs anyway.  | 
2419  |  |      */  | 
2420  |  |     if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&  | 
2421  |  |         (path[3] == '\\') )  | 
2422  |  |   return xmlStrdup((const xmlChar *) path);  | 
2423  |  | #endif  | 
2424  |  |  | 
2425  |  |   /* sanitize filename starting with // so it can be used as URI */  | 
2426  | 1.94M  |     if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))  | 
2427  | 6.93k  |         path++;  | 
2428  |  |  | 
2429  | 1.94M  |     if ((uri = xmlParseURI((const char *) path)) != NULL) { | 
2430  | 1.34M  |   xmlFreeURI(uri);  | 
2431  | 1.34M  |   return xmlStrdup(path);  | 
2432  | 1.34M  |     }  | 
2433  |  |  | 
2434  |  |     /* Check if this is an "absolute uri" */  | 
2435  | 596k  |     absuri = xmlStrstr(path, BAD_CAST "://");  | 
2436  | 596k  |     if (absuri != NULL) { | 
2437  | 133k  |         int l, j;  | 
2438  | 133k  |   unsigned char c;  | 
2439  | 133k  |   xmlChar *escURI;  | 
2440  |  |  | 
2441  |  |         /*  | 
2442  |  |    * this looks like an URI where some parts have not been  | 
2443  |  |    * escaped leading to a parsing problem.  Check that the first  | 
2444  |  |    * part matches a protocol.  | 
2445  |  |    */  | 
2446  | 133k  |   l = absuri - path;  | 
2447  |  |   /* Bypass if first part (part before the '://') is > 20 chars */  | 
2448  | 133k  |   if ((l <= 0) || (l > 20))  | 
2449  | 6.89k  |       goto path_processing;  | 
2450  |  |   /* Bypass if any non-alpha characters are present in first part */  | 
2451  | 476k  |   for (j = 0;j < l;j++) { | 
2452  | 363k  |       c = path[j];  | 
2453  | 363k  |       if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))  | 
2454  | 13.6k  |           goto path_processing;  | 
2455  | 363k  |   }  | 
2456  |  |  | 
2457  |  |   /* Escape all except the characters specified in the supplied path */  | 
2458  | 112k  |         escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");  | 
2459  | 112k  |   if (escURI != NULL) { | 
2460  |  |       /* Try parsing the escaped path */  | 
2461  | 112k  |       uri = xmlParseURI((const char *) escURI);  | 
2462  |  |       /* If successful, return the escaped string */  | 
2463  | 112k  |       if (uri != NULL) { | 
2464  | 100k  |           xmlFreeURI(uri);  | 
2465  | 100k  |     return escURI;  | 
2466  | 100k  |       }  | 
2467  | 12.1k  |             xmlFree(escURI);  | 
2468  | 12.1k  |   }  | 
2469  | 112k  |     }  | 
2470  |  |  | 
2471  | 496k  | path_processing:  | 
2472  |  | /* For Windows implementations, replace backslashes with 'forward slashes' */  | 
2473  |  | #if defined(_WIN32)  | 
2474  |  |     /*  | 
2475  |  |      * Create a URI structure  | 
2476  |  |      */  | 
2477  |  |     uri = xmlCreateURI();  | 
2478  |  |     if (uri == NULL) {    /* Guard against 'out of memory' */ | 
2479  |  |         return(NULL);  | 
2480  |  |     }  | 
2481  |  |  | 
2482  |  |     len = xmlStrlen(path);  | 
2483  |  |     if ((len > 2) && IS_WINDOWS_PATH(path)) { | 
2484  |  |         /* make the scheme 'file' */  | 
2485  |  |   uri->scheme = (char *) xmlStrdup(BAD_CAST "file");  | 
2486  |  |   /* allocate space for leading '/' + path + string terminator */  | 
2487  |  |   uri->path = xmlMallocAtomic(len + 2);  | 
2488  |  |   if (uri->path == NULL) { | 
2489  |  |       xmlFreeURI(uri);  /* Guard against 'out of memory' */  | 
2490  |  |       return(NULL);  | 
2491  |  |   }  | 
2492  |  |   /* Put in leading '/' plus path */  | 
2493  |  |   uri->path[0] = '/';  | 
2494  |  |   p = uri->path + 1;  | 
2495  |  |   strncpy(p, (char *) path, len + 1);  | 
2496  |  |     } else { | 
2497  |  |   uri->path = (char *) xmlStrdup(path);  | 
2498  |  |   if (uri->path == NULL) { | 
2499  |  |       xmlFreeURI(uri);  | 
2500  |  |       return(NULL);  | 
2501  |  |   }  | 
2502  |  |   p = uri->path;  | 
2503  |  |     }  | 
2504  |  |     /* Now change all occurrences of '\' to '/' */  | 
2505  |  |     while (*p != '\0') { | 
2506  |  |   if (*p == '\\')  | 
2507  |  |       *p = '/';  | 
2508  |  |   p++;  | 
2509  |  |     }  | 
2510  |  |  | 
2511  |  |     if (uri->scheme == NULL) { | 
2512  |  |   ret = xmlStrdup((const xmlChar *) uri->path);  | 
2513  |  |     } else { | 
2514  |  |   ret = xmlSaveUri(uri);  | 
2515  |  |     }  | 
2516  |  |  | 
2517  |  |     xmlFreeURI(uri);  | 
2518  |  | #else  | 
2519  | 496k  |     ret = xmlStrdup((const xmlChar *) path);  | 
2520  | 496k  | #endif  | 
2521  | 496k  |     return(ret);  | 
2522  | 596k  | }  | 
2523  |  |  | 
2524  |  | /**  | 
2525  |  |  * xmlPathToURI:  | 
2526  |  |  * @path:  the resource locator in a filesystem notation  | 
2527  |  |  *  | 
2528  |  |  * Constructs an URI expressing the existing path  | 
2529  |  |  *  | 
2530  |  |  * Returns a new URI, or a duplicate of the path parameter if the  | 
2531  |  |  * construction fails. The caller is responsible for freeing the memory  | 
2532  |  |  * occupied by the returned string. If there is insufficient memory available,  | 
2533  |  |  * or the argument is NULL, the function returns NULL.  | 
2534  |  |  */  | 
2535  |  | xmlChar *  | 
2536  |  | xmlPathToURI(const xmlChar *path)  | 
2537  | 614k  | { | 
2538  | 614k  |     xmlURIPtr uri;  | 
2539  | 614k  |     xmlURI temp;  | 
2540  | 614k  |     xmlChar *ret, *cal;  | 
2541  |  |  | 
2542  | 614k  |     if (path == NULL)  | 
2543  | 0  |         return(NULL);  | 
2544  |  |  | 
2545  | 614k  |     if ((uri = xmlParseURI((const char *) path)) != NULL) { | 
2546  | 260k  |   xmlFreeURI(uri);  | 
2547  | 260k  |   return xmlStrdup(path);  | 
2548  | 260k  |     }  | 
2549  | 354k  |     cal = xmlCanonicPath(path);  | 
2550  | 354k  |     if (cal == NULL)  | 
2551  | 0  |         return(NULL);  | 
2552  |  | #if defined(_WIN32)  | 
2553  |  |     /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)  | 
2554  |  |        If 'cal' is a valid URI already then we are done here, as continuing would make  | 
2555  |  |        it invalid. */  | 
2556  |  |     if ((uri = xmlParseURI((const char *) cal)) != NULL) { | 
2557  |  |   xmlFreeURI(uri);  | 
2558  |  |   return cal;  | 
2559  |  |     }  | 
2560  |  |     /* 'cal' can contain a relative path with backslashes. If that is processed  | 
2561  |  |        by xmlSaveURI, they will be escaped and the external entity loader machinery  | 
2562  |  |        will fail. So convert them to slashes. Misuse 'ret' for walking. */  | 
2563  |  |     ret = cal;  | 
2564  |  |     while (*ret != '\0') { | 
2565  |  |   if (*ret == '\\')  | 
2566  |  |       *ret = '/';  | 
2567  |  |   ret++;  | 
2568  |  |     }  | 
2569  |  | #endif  | 
2570  | 354k  |     memset(&temp, 0, sizeof(temp));  | 
2571  | 354k  |     temp.path = (char *) cal;  | 
2572  | 354k  |     ret = xmlSaveUri(&temp);  | 
2573  | 354k  |     xmlFree(cal);  | 
2574  | 354k  |     return(ret);  | 
2575  | 354k  | }  |