Coverage Report

Created: 2026-05-16 06:55

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/wget2/libwget/metalink.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2012 Tim Ruehsen
3
 * Copyright (c) 2015-2026 Free Software Foundation, Inc.
4
 *
5
 * This file is part of libwget.
6
 *
7
 * Libwget is free software: you can redistribute it and/or modify
8
 * it under the terms of the GNU Lesser General Public License as published by
9
 * the Free Software Foundation, either version 3 of the License, or
10
 * (at your option) any later version.
11
 *
12
 * Libwget is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 * GNU Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public License
18
 * along with libwget.  If not, see <https://www.gnu.org/licenses/>.
19
 *
20
 *
21
 * Metalink parsing routines
22
 *
23
 * Changelog
24
 * 10.07.2012  Tim Ruehsen  created (refactored from wget.c)
25
 *
26
 * Resources:
27
 * RFC 5854 - The Metalink Download Description Format
28
 * RFC 6249 Metalink/HTTP: Mirrors and Hashes
29
 * RFC 5988 Link HTTP Header update
30
 * RFC 3864 Link HTTP Header
31
 * RFC 3230 Digest HTTP Header
32
 *
33
 * Some examples to test:
34
 * http://go-oo.mirrorbrain.org/stable/linux-x86/3.2.1/ooobasis3.2-af-calc-3.2.1-9505.i586.rpm
35
 * http://go-oo.mirrorbrain.org/stable/linux-x86/3.2.1/ooobasis3.2-ar-help-3.2.1-9505.i586.rpm
36
 * http://download.services.openoffice.org/files/stable/
37
 * http://go-oo.mirrorbrain.org/evolution/stable/Evolution-2.24.0.exe
38
 */
39
40
#include <config.h>
41
42
#include <stdio.h>
43
#include <stdlib.h>
44
#include <string.h>
45
#include <limits.h>
46
47
#include <wget.h>
48
#include "private.h"
49
#include "filename.h"
50
51
typedef struct {
52
  wget_metalink
53
    *metalink;
54
  int
55
    priority;
56
//    id; // counting piece number in metalink 3
57
  char
58
    hash[128],
59
    hash_type[16],
60
    location[8];
61
  long long
62
    length;
63
} metalink_context ;
64
65
static void mirror_free(void *mirror)
66
38.1k
{
67
38.1k
  wget_metalink_mirror *m = mirror;
68
69
38.1k
  if (m) {
70
38.1k
    wget_iri_free((wget_iri **) &m->iri);
71
38.1k
    xfree(m);
72
38.1k
  }
73
38.1k
}
74
75
static void add_piece(metalink_context *ctx, const char *value)
76
6.38k
{
77
6.38k
  wget_metalink *metalink = ctx->metalink;
78
79
6.38k
  sscanf(value, "%127s", ctx->hash);
80
81
6.38k
  if (ctx->length && *ctx->hash_type && *ctx->hash) {
82
    // hash for a piece of the file
83
5.52k
    wget_metalink_piece piece, *piecep;
84
85
5.52k
    if (!metalink->pieces)
86
230
      metalink->pieces = wget_vector_create(32, NULL);
87
88
5.52k
    piece.length = ctx->length;
89
5.52k
    wget_strscpy(piece.hash.type, ctx->hash_type, sizeof(piece.hash.type));
90
5.52k
    wget_strscpy(piece.hash.hash_hex, ctx->hash, sizeof(piece.hash.hash_hex));
91
92
5.52k
    piecep = wget_vector_get(metalink->pieces, wget_vector_size(metalink->pieces) - 1);
93
5.52k
    if (piecep && piecep->length > 0) {
94
1.44k
      if (piecep->position <= LONG_MAX - piecep->length)
95
1.19k
        piece.position = piecep->position + piecep->length;
96
255
      else
97
255
        piece.position = 0; // integer overflow
98
1.44k
    } else
99
4.07k
      piece.position = 0;
100
5.52k
    wget_vector_add_memdup(metalink->pieces, &piece, sizeof(wget_metalink_piece));
101
5.52k
  }
102
103
6.38k
  *ctx->hash = 0;
104
6.38k
}
105
106
static void add_file_hash(metalink_context *ctx, const char *value)
107
713
{
108
713
  wget_metalink *metalink = ctx->metalink;
109
110
713
  sscanf(value, "%127s", ctx->hash);
111
112
713
  if (*ctx->hash_type && *ctx->hash) {
113
    // hashes for the complete file
114
138
    wget_metalink_hash hash = { 0 };
115
116
138
    wget_strscpy(hash.type, ctx->hash_type, sizeof(hash.type));
117
138
    wget_strscpy(hash.hash_hex, ctx->hash, sizeof(hash.hash_hex));
118
119
138
    if (!metalink->hashes)
120
24
      metalink->hashes = wget_vector_create(4, NULL);
121
138
    wget_vector_add_memdup(metalink->hashes, &hash, sizeof(wget_metalink_hash));
122
138
  }
123
124
713
  *ctx->hash_type = *ctx->hash = 0;
125
713
}
126
127
static void add_mirror(metalink_context *ctx, const char *value)
128
40.5k
{
129
40.5k
  wget_iri *iri = wget_iri_parse(value, NULL);
130
131
40.5k
  if (!iri)
132
2.38k
    return;
133
134
38.1k
  if (!wget_iri_supported(iri)) {
135
0
    error_printf(_("Mirror scheme not supported: '%s'\n"), value);
136
0
    wget_iri_free(&iri);
137
0
    return;
138
0
  }
139
140
/*  if (iri->scheme == WGET_IRI_SCHEME_HTTP)
141
    test_modify_hsts(iri);
142
143
  if (config.https_only && iri->scheme != WGET_IRI_SCHEME_HTTPS) {
144
    info_printf(_("Mirror '%s' dropped (https-only requested)\n"), value);
145
    wget_iri_free(&iri);
146
    return;
147
  }
148
149
  if (iri->scheme == WGET_IRI_SCHEME_HTTP && config.https_enforce) {
150
    wget_iri_set_scheme(iri, WGET_IRI_SCHEME_HTTPS);
151
  }
152
*/
153
154
38.1k
  wget_metalink *metalink = ctx->metalink;
155
38.1k
  wget_metalink_mirror *mirror = wget_calloc(1, sizeof(wget_metalink_mirror));
156
157
38.1k
  if (mirror) {
158
38.1k
    wget_strscpy(mirror->location, ctx->location, sizeof(mirror->location));
159
38.1k
    mirror->priority = ctx->priority;
160
38.1k
    mirror->iri = iri;
161
162
38.1k
    if (!metalink->mirrors) {
163
3.76k
      metalink->mirrors = wget_vector_create(4, NULL);
164
3.76k
      wget_vector_set_destructor(metalink->mirrors, mirror_free);
165
3.76k
    }
166
38.1k
    wget_vector_add(metalink->mirrors, mirror);
167
38.1k
  }
168
169
38.1k
  *ctx->location = 0;
170
38.1k
  ctx->priority = 999999;
171
38.1k
}
172
173
static const char *sanitized_filename(const char *in)
174
1.07k
{
175
  // RFC 5854:
176
  //   The path MUST NOT contain any directory traversal
177
  //   directives or information.  The path MUST be relative.  The path
178
  //   MUST NOT begin with a "/", "./", or "../"; contain "/../"; or end
179
  //   with "/..".
180
1.07k
  const char *p = in + FILE_SYSTEM_PREFIX_LEN(in); // skip drive letter on Windows
181
182
  // Reject absolute paths (leading "/" or "\\")
183
1.07k
  if (ISSLASH(*p))
184
384
    return NULL;
185
186
  // Reject "../" at the start
187
693
  if (!strncmp(p, "./", 2) || !strncmp(p, "../", 3))
188
388
    return NULL;
189
190
  // Reject "/../" anywhere in the path
191
305
  if (strstr(p, "/../"))
192
195
    return NULL;
193
194
  // Reject trailing "/.."
195
110
  if (wget_match_tail(p, "/.."))
196
0
    return NULL;
197
198
#ifdef WIN32
199
  // Reject "..\\" at the start
200
  if (!strncmp(p, ".\\", 2) || !strncmp(p, "..\\", 3))
201
    return NULL;
202
203
  // Reject "\\../" or "\\..\\" or "/..\\" anywhere in the path
204
  if (strstr(p, "\\../") || strstr(p, "\\..\\") || strstr(p, "/..\\"))
205
    return NULL;
206
207
  // Reject trailing "\\.."
208
  if (wget_match_tail(p, "\\.."))
209
    return NULL;
210
#endif
211
212
110
  return wget_strdup(in);
213
110
}
214
215
static void metalink_parse(void *context, int flags, const char *dir, const char *attr, const char *val, size_t len, size_t pos WGET_GCC_UNUSED)
216
152k
{
217
152k
  metalink_context *ctx = context;
218
152k
  char valuebuf[1024];
219
152k
  const char *value;
220
221
  // info_printf("\n%02X %s %s '%s'\n", flags, dir, attr, value);
222
152k
  if (!(flags & (XML_FLG_CONTENT | XML_FLG_ATTRIBUTE)))
223
90.6k
    return; // ignore comments
224
225
62.2k
  if (wget_strncasecmp_ascii(dir, "/metalink/file", 14))
226
5.81k
    return;
227
228
56.3k
  dir += 14;
229
230
56.3k
  if (!(value = wget_strmemcpy_a(valuebuf, sizeof(valuebuf), val ? val : "", len)))
231
0
    return;
232
233
56.3k
  if (!wget_strncasecmp_ascii(dir, "s/file", 6)) {
234
    // metalink 3 XML format
235
3.88k
    dir += 6;
236
237
3.88k
    if (attr) {
238
2.76k
      if (*dir == 0) { // /metalink/file
239
623
        if (!ctx->metalink->name && !wget_strcasecmp_ascii(attr, "name")) {
240
203
          ctx->metalink->name = sanitized_filename(value);
241
203
        }
242
2.14k
      } else if (!wget_strcasecmp_ascii(dir, "/verification/pieces")) {
243
592
        if (!wget_strcasecmp_ascii(attr, "type")) {
244
194
          sscanf(value, "%15s", ctx->hash_type);
245
398
        } else if (!wget_strcasecmp_ascii(attr, "length")) {
246
194
          ctx->length = atoll(value);
247
194
        }
248
//      } else if (!wget_strcasecmp_ascii(dir, "/verification/pieces/hash")) {
249
//        if (!wget_strcasecmp_ascii(attr, "type")) {
250
//          ctx->id = atoi(value);
251
//        }
252
1.55k
      } else if (!wget_strcasecmp_ascii(dir, "/verification/hash")) {
253
394
        if (!wget_strcasecmp_ascii(attr, "type")) {
254
194
          sscanf(value, "%15s", ctx->hash_type);
255
194
        }
256
1.15k
      } else if (!wget_strcasecmp_ascii(dir, "/resources/url")) {
257
937
        if (!wget_strcasecmp_ascii(attr, "location")) {
258
222
          sscanf(value, " %2[a-zA-Z]", ctx->location); // ISO 3166-1 alpha-2 two letter country code
259
//        } else if (!wget_strcasecmp_ascii(attr, "protocol")) {
260
//          sscanf(value, " %7[a-zA-Z]", ctx->protocol); // type of URL, e.g. HTTP, HTTPS, FTP, ...
261
//        } else if (!wget_strcasecmp_ascii(attr, "type")) {
262
//          sscanf(value, " %2[a-zA-Z]", ctx->type); // type of URL, e.g. HTTP, FTP, ...
263
715
        } else if (!wget_strcasecmp_ascii(attr, "preference")) {
264
443
          sscanf(value, " %6d", &ctx->priority);
265
443
          if (ctx->priority < 1 || ctx->priority > 999999)
266
214
            ctx->priority = 999999;
267
443
        }
268
937
      }
269
2.76k
    } else {
270
1.11k
      if (!wget_strcasecmp_ascii(dir, "/verification/pieces/hash")) {
271
194
        add_piece(ctx, value);
272
924
      } else if (!wget_strcasecmp_ascii(dir, "/verification/hash")) {
273
194
        add_file_hash(ctx, value);
274
730
      } else if (!wget_strcasecmp_ascii(dir, "/size")) {
275
194
        ctx->metalink->size = atoll(value);
276
536
      } else if (!wget_strcasecmp_ascii(dir, "/resources/url")) {
277
324
        add_mirror(ctx, value);
278
324
      }
279
1.11k
    }
280
52.4k
  } else {
281
    // metalink 4 XML format
282
52.4k
    if (attr) {
283
5.11k
      if (*dir == 0) { // /metalink/file
284
1.60k
        if (!ctx->metalink->name && !wget_strcasecmp_ascii(attr, "name")) {
285
874
          ctx->metalink->name = sanitized_filename(value);
286
874
        }
287
3.50k
      } else if (!wget_strcasecmp_ascii(dir, "/pieces")) {
288
1.24k
        if (!wget_strcasecmp_ascii(attr, "type")) {
289
432
          sscanf(value, "%15s", ctx->hash_type);
290
813
        } else if (!wget_strcasecmp_ascii(attr, "length")) {
291
530
          ctx->length = atoll(value);
292
530
        }
293
2.25k
      } else if (!wget_strcasecmp_ascii(dir, "/hash")) {
294
750
        if (!wget_strcasecmp_ascii(attr, "type")) {
295
519
          sscanf(value, "%15s", ctx->hash_type);
296
519
        }
297
1.50k
      } else if (!wget_strcasecmp_ascii(dir, "/url")) {
298
1.09k
        if (!wget_strcasecmp_ascii(attr, "location")) {
299
194
          sscanf(value, " %2[a-zA-Z]", ctx->location); // ISO 3166-1 alpha-2 two letter country code
300
904
        } else if (!wget_strcasecmp_ascii(attr, "priority") || !wget_strcasecmp_ascii(attr, "preference")) {
301
609
          sscanf(value, " %6d", &ctx->priority);
302
609
          if (ctx->priority < 1 || ctx->priority > 999999)
303
198
            ctx->priority = 999999;
304
609
        }
305
1.09k
      }
306
47.3k
    } else {
307
47.3k
      if (!wget_strcasecmp_ascii(dir, "/pieces/hash")) {
308
6.19k
        add_piece(ctx, value);
309
41.1k
      } else if (!wget_strcasecmp_ascii(dir, "/hash")) {
310
519
        add_file_hash(ctx, value);
311
40.6k
      } else if (!wget_strcasecmp_ascii(dir, "/size")) {
312
194
        ctx->metalink->size = atoll(value);
313
40.4k
      } else if (!wget_strcasecmp_ascii(dir, "/url")) {
314
40.2k
        add_mirror(ctx, value);
315
40.2k
      }
316
47.3k
    }
317
52.4k
  }
318
319
56.3k
  if (value != valuebuf)
320
70
    xfree(value);
321
56.3k
}
322
323
wget_metalink *wget_metalink_parse(const char *xml)
324
6.37k
{
325
6.37k
  if (!xml)
326
0
    return NULL;
327
328
6.37k
  wget_metalink *metalink = wget_calloc(1, sizeof(wget_metalink));
329
6.37k
  metalink_context ctx = { .metalink = metalink, .priority = 999999, .location = "-" };
330
331
6.37k
  if (wget_xml_parse_buffer(xml, metalink_parse, &ctx, 0) != WGET_E_SUCCESS) {
332
429
    error_printf(_("Error in parsing XML"));
333
429
    wget_metalink_free(&metalink);
334
429
  }
335
336
6.37k
  return metalink;
337
6.37k
}
338
339
void wget_metalink_free(wget_metalink **metalink)
340
6.80k
{
341
6.80k
  if (metalink && *metalink) {
342
6.37k
    xfree((*metalink)->name);
343
6.37k
    wget_vector_free(&(*metalink)->mirrors);
344
6.37k
    wget_vector_free(&(*metalink)->hashes);
345
6.37k
    wget_vector_free(&(*metalink)->pieces);
346
6.37k
    xfree(*metalink);
347
6.37k
  }
348
6.80k
}
349
350
WGET_GCC_PURE
351
static int compare_mirror(wget_metalink_mirror *m1, wget_metalink_mirror *m2)
352
124k
{
353
124k
  return m1->priority - m2->priority;
354
124k
}
355
356
void wget_metalink_sort_mirrors(wget_metalink *metalink)
357
6.37k
{
358
6.37k
  if (metalink) {
359
5.94k
    wget_vector_setcmpfunc(metalink->mirrors, (wget_vector_compare_fn *) compare_mirror);
360
5.94k
    wget_vector_sort(metalink->mirrors);
361
5.94k
  }
362
6.37k
}