/src/wget2/libwget/metalink.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2012 Tim Ruehsen |
3 | | * Copyright (c) 2015-2026 Free Software Foundation, Inc. |
4 | | * |
5 | | * This file is part of libwget. |
6 | | * |
7 | | * Libwget is free software: you can redistribute it and/or modify |
8 | | * it under the terms of the GNU Lesser General Public License as published by |
9 | | * the Free Software Foundation, either version 3 of the License, or |
10 | | * (at your option) any later version. |
11 | | * |
12 | | * Libwget is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | | * GNU Lesser General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU Lesser General Public License |
18 | | * along with libwget. If not, see <https://www.gnu.org/licenses/>. |
19 | | * |
20 | | * |
21 | | * Metalink parsing routines |
22 | | * |
23 | | * Changelog |
24 | | * 10.07.2012 Tim Ruehsen created (refactored from wget.c) |
25 | | * |
26 | | * Resources: |
27 | | * RFC 5854 - The Metalink Download Description Format |
28 | | * RFC 6249 Metalink/HTTP: Mirrors and Hashes |
29 | | * RFC 5988 Link HTTP Header update |
30 | | * RFC 3864 Link HTTP Header |
31 | | * RFC 3230 Digest HTTP Header |
32 | | * |
33 | | * Some examples to test: |
34 | | * http://go-oo.mirrorbrain.org/stable/linux-x86/3.2.1/ooobasis3.2-af-calc-3.2.1-9505.i586.rpm |
35 | | * http://go-oo.mirrorbrain.org/stable/linux-x86/3.2.1/ooobasis3.2-ar-help-3.2.1-9505.i586.rpm |
36 | | * http://download.services.openoffice.org/files/stable/ |
37 | | * http://go-oo.mirrorbrain.org/evolution/stable/Evolution-2.24.0.exe |
38 | | */ |
39 | | |
40 | | #include <config.h> |
41 | | |
42 | | #include <stdio.h> |
43 | | #include <stdlib.h> |
44 | | #include <string.h> |
45 | | #include <limits.h> |
46 | | |
47 | | #include <wget.h> |
48 | | #include "private.h" |
49 | | #include "filename.h" |
50 | | |
51 | | typedef struct { |
52 | | wget_metalink |
53 | | *metalink; |
54 | | int |
55 | | priority; |
56 | | // id; // counting piece number in metalink 3 |
57 | | char |
58 | | hash[128], |
59 | | hash_type[16], |
60 | | location[8]; |
61 | | long long |
62 | | length; |
63 | | } metalink_context ; |
64 | | |
65 | | static void mirror_free(void *mirror) |
66 | 38.1k | { |
67 | 38.1k | wget_metalink_mirror *m = mirror; |
68 | | |
69 | 38.1k | if (m) { |
70 | 38.1k | wget_iri_free((wget_iri **) &m->iri); |
71 | 38.1k | xfree(m); |
72 | 38.1k | } |
73 | 38.1k | } |
74 | | |
75 | | static void add_piece(metalink_context *ctx, const char *value) |
76 | 6.38k | { |
77 | 6.38k | wget_metalink *metalink = ctx->metalink; |
78 | | |
79 | 6.38k | sscanf(value, "%127s", ctx->hash); |
80 | | |
81 | 6.38k | if (ctx->length && *ctx->hash_type && *ctx->hash) { |
82 | | // hash for a piece of the file |
83 | 5.52k | wget_metalink_piece piece, *piecep; |
84 | | |
85 | 5.52k | if (!metalink->pieces) |
86 | 230 | metalink->pieces = wget_vector_create(32, NULL); |
87 | | |
88 | 5.52k | piece.length = ctx->length; |
89 | 5.52k | wget_strscpy(piece.hash.type, ctx->hash_type, sizeof(piece.hash.type)); |
90 | 5.52k | wget_strscpy(piece.hash.hash_hex, ctx->hash, sizeof(piece.hash.hash_hex)); |
91 | | |
92 | 5.52k | piecep = wget_vector_get(metalink->pieces, wget_vector_size(metalink->pieces) - 1); |
93 | 5.52k | if (piecep && piecep->length > 0) { |
94 | 1.44k | if (piecep->position <= LONG_MAX - piecep->length) |
95 | 1.19k | piece.position = piecep->position + piecep->length; |
96 | 255 | else |
97 | 255 | piece.position = 0; // integer overflow |
98 | 1.44k | } else |
99 | 4.07k | piece.position = 0; |
100 | 5.52k | wget_vector_add_memdup(metalink->pieces, &piece, sizeof(wget_metalink_piece)); |
101 | 5.52k | } |
102 | | |
103 | 6.38k | *ctx->hash = 0; |
104 | 6.38k | } |
105 | | |
106 | | static void add_file_hash(metalink_context *ctx, const char *value) |
107 | 713 | { |
108 | 713 | wget_metalink *metalink = ctx->metalink; |
109 | | |
110 | 713 | sscanf(value, "%127s", ctx->hash); |
111 | | |
112 | 713 | if (*ctx->hash_type && *ctx->hash) { |
113 | | // hashes for the complete file |
114 | 138 | wget_metalink_hash hash = { 0 }; |
115 | | |
116 | 138 | wget_strscpy(hash.type, ctx->hash_type, sizeof(hash.type)); |
117 | 138 | wget_strscpy(hash.hash_hex, ctx->hash, sizeof(hash.hash_hex)); |
118 | | |
119 | 138 | if (!metalink->hashes) |
120 | 24 | metalink->hashes = wget_vector_create(4, NULL); |
121 | 138 | wget_vector_add_memdup(metalink->hashes, &hash, sizeof(wget_metalink_hash)); |
122 | 138 | } |
123 | | |
124 | 713 | *ctx->hash_type = *ctx->hash = 0; |
125 | 713 | } |
126 | | |
127 | | static void add_mirror(metalink_context *ctx, const char *value) |
128 | 40.5k | { |
129 | 40.5k | wget_iri *iri = wget_iri_parse(value, NULL); |
130 | | |
131 | 40.5k | if (!iri) |
132 | 2.38k | return; |
133 | | |
134 | 38.1k | if (!wget_iri_supported(iri)) { |
135 | 0 | error_printf(_("Mirror scheme not supported: '%s'\n"), value); |
136 | 0 | wget_iri_free(&iri); |
137 | 0 | return; |
138 | 0 | } |
139 | | |
140 | | /* if (iri->scheme == WGET_IRI_SCHEME_HTTP) |
141 | | test_modify_hsts(iri); |
142 | | |
143 | | if (config.https_only && iri->scheme != WGET_IRI_SCHEME_HTTPS) { |
144 | | info_printf(_("Mirror '%s' dropped (https-only requested)\n"), value); |
145 | | wget_iri_free(&iri); |
146 | | return; |
147 | | } |
148 | | |
149 | | if (iri->scheme == WGET_IRI_SCHEME_HTTP && config.https_enforce) { |
150 | | wget_iri_set_scheme(iri, WGET_IRI_SCHEME_HTTPS); |
151 | | } |
152 | | */ |
153 | | |
154 | 38.1k | wget_metalink *metalink = ctx->metalink; |
155 | 38.1k | wget_metalink_mirror *mirror = wget_calloc(1, sizeof(wget_metalink_mirror)); |
156 | | |
157 | 38.1k | if (mirror) { |
158 | 38.1k | wget_strscpy(mirror->location, ctx->location, sizeof(mirror->location)); |
159 | 38.1k | mirror->priority = ctx->priority; |
160 | 38.1k | mirror->iri = iri; |
161 | | |
162 | 38.1k | if (!metalink->mirrors) { |
163 | 3.76k | metalink->mirrors = wget_vector_create(4, NULL); |
164 | 3.76k | wget_vector_set_destructor(metalink->mirrors, mirror_free); |
165 | 3.76k | } |
166 | 38.1k | wget_vector_add(metalink->mirrors, mirror); |
167 | 38.1k | } |
168 | | |
169 | 38.1k | *ctx->location = 0; |
170 | 38.1k | ctx->priority = 999999; |
171 | 38.1k | } |
172 | | |
173 | | static const char *sanitized_filename(const char *in) |
174 | 1.07k | { |
175 | | // RFC 5854: |
176 | | // The path MUST NOT contain any directory traversal |
177 | | // directives or information. The path MUST be relative. The path |
178 | | // MUST NOT begin with a "/", "./", or "../"; contain "/../"; or end |
179 | | // with "/..". |
180 | 1.07k | const char *p = in + FILE_SYSTEM_PREFIX_LEN(in); // skip drive letter on Windows |
181 | | |
182 | | // Reject absolute paths (leading "/" or "\\") |
183 | 1.07k | if (ISSLASH(*p)) |
184 | 384 | return NULL; |
185 | | |
186 | | // Reject "../" at the start |
187 | 693 | if (!strncmp(p, "./", 2) || !strncmp(p, "../", 3)) |
188 | 388 | return NULL; |
189 | | |
190 | | // Reject "/../" anywhere in the path |
191 | 305 | if (strstr(p, "/../")) |
192 | 195 | return NULL; |
193 | | |
194 | | // Reject trailing "/.." |
195 | 110 | if (wget_match_tail(p, "/..")) |
196 | 0 | return NULL; |
197 | | |
198 | | #ifdef WIN32 |
199 | | // Reject "..\\" at the start |
200 | | if (!strncmp(p, ".\\", 2) || !strncmp(p, "..\\", 3)) |
201 | | return NULL; |
202 | | |
203 | | // Reject "\\../" or "\\..\\" or "/..\\" anywhere in the path |
204 | | if (strstr(p, "\\../") || strstr(p, "\\..\\") || strstr(p, "/..\\")) |
205 | | return NULL; |
206 | | |
207 | | // Reject trailing "\\.." |
208 | | if (wget_match_tail(p, "\\..")) |
209 | | return NULL; |
210 | | #endif |
211 | | |
212 | 110 | return wget_strdup(in); |
213 | 110 | } |
214 | | |
215 | | static void metalink_parse(void *context, int flags, const char *dir, const char *attr, const char *val, size_t len, size_t pos WGET_GCC_UNUSED) |
216 | 152k | { |
217 | 152k | metalink_context *ctx = context; |
218 | 152k | char valuebuf[1024]; |
219 | 152k | const char *value; |
220 | | |
221 | | // info_printf("\n%02X %s %s '%s'\n", flags, dir, attr, value); |
222 | 152k | if (!(flags & (XML_FLG_CONTENT | XML_FLG_ATTRIBUTE))) |
223 | 90.6k | return; // ignore comments |
224 | | |
225 | 62.2k | if (wget_strncasecmp_ascii(dir, "/metalink/file", 14)) |
226 | 5.81k | return; |
227 | | |
228 | 56.3k | dir += 14; |
229 | | |
230 | 56.3k | if (!(value = wget_strmemcpy_a(valuebuf, sizeof(valuebuf), val ? val : "", len))) |
231 | 0 | return; |
232 | | |
233 | 56.3k | if (!wget_strncasecmp_ascii(dir, "s/file", 6)) { |
234 | | // metalink 3 XML format |
235 | 3.88k | dir += 6; |
236 | | |
237 | 3.88k | if (attr) { |
238 | 2.76k | if (*dir == 0) { // /metalink/file |
239 | 623 | if (!ctx->metalink->name && !wget_strcasecmp_ascii(attr, "name")) { |
240 | 203 | ctx->metalink->name = sanitized_filename(value); |
241 | 203 | } |
242 | 2.14k | } else if (!wget_strcasecmp_ascii(dir, "/verification/pieces")) { |
243 | 592 | if (!wget_strcasecmp_ascii(attr, "type")) { |
244 | 194 | sscanf(value, "%15s", ctx->hash_type); |
245 | 398 | } else if (!wget_strcasecmp_ascii(attr, "length")) { |
246 | 194 | ctx->length = atoll(value); |
247 | 194 | } |
248 | | // } else if (!wget_strcasecmp_ascii(dir, "/verification/pieces/hash")) { |
249 | | // if (!wget_strcasecmp_ascii(attr, "type")) { |
250 | | // ctx->id = atoi(value); |
251 | | // } |
252 | 1.55k | } else if (!wget_strcasecmp_ascii(dir, "/verification/hash")) { |
253 | 394 | if (!wget_strcasecmp_ascii(attr, "type")) { |
254 | 194 | sscanf(value, "%15s", ctx->hash_type); |
255 | 194 | } |
256 | 1.15k | } else if (!wget_strcasecmp_ascii(dir, "/resources/url")) { |
257 | 937 | if (!wget_strcasecmp_ascii(attr, "location")) { |
258 | 222 | sscanf(value, " %2[a-zA-Z]", ctx->location); // ISO 3166-1 alpha-2 two letter country code |
259 | | // } else if (!wget_strcasecmp_ascii(attr, "protocol")) { |
260 | | // sscanf(value, " %7[a-zA-Z]", ctx->protocol); // type of URL, e.g. HTTP, HTTPS, FTP, ... |
261 | | // } else if (!wget_strcasecmp_ascii(attr, "type")) { |
262 | | // sscanf(value, " %2[a-zA-Z]", ctx->type); // type of URL, e.g. HTTP, FTP, ... |
263 | 715 | } else if (!wget_strcasecmp_ascii(attr, "preference")) { |
264 | 443 | sscanf(value, " %6d", &ctx->priority); |
265 | 443 | if (ctx->priority < 1 || ctx->priority > 999999) |
266 | 214 | ctx->priority = 999999; |
267 | 443 | } |
268 | 937 | } |
269 | 2.76k | } else { |
270 | 1.11k | if (!wget_strcasecmp_ascii(dir, "/verification/pieces/hash")) { |
271 | 194 | add_piece(ctx, value); |
272 | 924 | } else if (!wget_strcasecmp_ascii(dir, "/verification/hash")) { |
273 | 194 | add_file_hash(ctx, value); |
274 | 730 | } else if (!wget_strcasecmp_ascii(dir, "/size")) { |
275 | 194 | ctx->metalink->size = atoll(value); |
276 | 536 | } else if (!wget_strcasecmp_ascii(dir, "/resources/url")) { |
277 | 324 | add_mirror(ctx, value); |
278 | 324 | } |
279 | 1.11k | } |
280 | 52.4k | } else { |
281 | | // metalink 4 XML format |
282 | 52.4k | if (attr) { |
283 | 5.11k | if (*dir == 0) { // /metalink/file |
284 | 1.60k | if (!ctx->metalink->name && !wget_strcasecmp_ascii(attr, "name")) { |
285 | 874 | ctx->metalink->name = sanitized_filename(value); |
286 | 874 | } |
287 | 3.50k | } else if (!wget_strcasecmp_ascii(dir, "/pieces")) { |
288 | 1.24k | if (!wget_strcasecmp_ascii(attr, "type")) { |
289 | 432 | sscanf(value, "%15s", ctx->hash_type); |
290 | 813 | } else if (!wget_strcasecmp_ascii(attr, "length")) { |
291 | 530 | ctx->length = atoll(value); |
292 | 530 | } |
293 | 2.25k | } else if (!wget_strcasecmp_ascii(dir, "/hash")) { |
294 | 750 | if (!wget_strcasecmp_ascii(attr, "type")) { |
295 | 519 | sscanf(value, "%15s", ctx->hash_type); |
296 | 519 | } |
297 | 1.50k | } else if (!wget_strcasecmp_ascii(dir, "/url")) { |
298 | 1.09k | if (!wget_strcasecmp_ascii(attr, "location")) { |
299 | 194 | sscanf(value, " %2[a-zA-Z]", ctx->location); // ISO 3166-1 alpha-2 two letter country code |
300 | 904 | } else if (!wget_strcasecmp_ascii(attr, "priority") || !wget_strcasecmp_ascii(attr, "preference")) { |
301 | 609 | sscanf(value, " %6d", &ctx->priority); |
302 | 609 | if (ctx->priority < 1 || ctx->priority > 999999) |
303 | 198 | ctx->priority = 999999; |
304 | 609 | } |
305 | 1.09k | } |
306 | 47.3k | } else { |
307 | 47.3k | if (!wget_strcasecmp_ascii(dir, "/pieces/hash")) { |
308 | 6.19k | add_piece(ctx, value); |
309 | 41.1k | } else if (!wget_strcasecmp_ascii(dir, "/hash")) { |
310 | 519 | add_file_hash(ctx, value); |
311 | 40.6k | } else if (!wget_strcasecmp_ascii(dir, "/size")) { |
312 | 194 | ctx->metalink->size = atoll(value); |
313 | 40.4k | } else if (!wget_strcasecmp_ascii(dir, "/url")) { |
314 | 40.2k | add_mirror(ctx, value); |
315 | 40.2k | } |
316 | 47.3k | } |
317 | 52.4k | } |
318 | | |
319 | 56.3k | if (value != valuebuf) |
320 | 70 | xfree(value); |
321 | 56.3k | } |
322 | | |
323 | | wget_metalink *wget_metalink_parse(const char *xml) |
324 | 6.37k | { |
325 | 6.37k | if (!xml) |
326 | 0 | return NULL; |
327 | | |
328 | 6.37k | wget_metalink *metalink = wget_calloc(1, sizeof(wget_metalink)); |
329 | 6.37k | metalink_context ctx = { .metalink = metalink, .priority = 999999, .location = "-" }; |
330 | | |
331 | 6.37k | if (wget_xml_parse_buffer(xml, metalink_parse, &ctx, 0) != WGET_E_SUCCESS) { |
332 | 429 | error_printf(_("Error in parsing XML")); |
333 | 429 | wget_metalink_free(&metalink); |
334 | 429 | } |
335 | | |
336 | 6.37k | return metalink; |
337 | 6.37k | } |
338 | | |
339 | | void wget_metalink_free(wget_metalink **metalink) |
340 | 6.80k | { |
341 | 6.80k | if (metalink && *metalink) { |
342 | 6.37k | xfree((*metalink)->name); |
343 | 6.37k | wget_vector_free(&(*metalink)->mirrors); |
344 | 6.37k | wget_vector_free(&(*metalink)->hashes); |
345 | 6.37k | wget_vector_free(&(*metalink)->pieces); |
346 | 6.37k | xfree(*metalink); |
347 | 6.37k | } |
348 | 6.80k | } |
349 | | |
350 | | WGET_GCC_PURE |
351 | | static int compare_mirror(wget_metalink_mirror *m1, wget_metalink_mirror *m2) |
352 | 124k | { |
353 | 124k | return m1->priority - m2->priority; |
354 | 124k | } |
355 | | |
356 | | void wget_metalink_sort_mirrors(wget_metalink *metalink) |
357 | 6.37k | { |
358 | 6.37k | if (metalink) { |
359 | 5.94k | wget_vector_setcmpfunc(metalink->mirrors, (wget_vector_compare_fn *) compare_mirror); |
360 | 5.94k | wget_vector_sort(metalink->mirrors); |
361 | 5.94k | } |
362 | 6.37k | } |