Coverage Report

Created: 2025-07-23 07:18

/src/wget2/libwget/decompressor.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2012 Tim Ruehsen
3
 * Copyright (c) 2015-2024 Free Software Foundation, Inc.
4
 *
5
 * This file is part of libwget.
6
 *
7
 * Libwget is free software: you can redistribute it and/or modify
8
 * it under the terms of the GNU Lesser General Public License as published by
9
 * the Free Software Foundation, either version 3 of the License, or
10
 * (at your option) any later version.
11
 *
12
 * Libwget is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 * GNU Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public License
18
 * along with libwget.  If not, see <https://www.gnu.org/licenses/>.
19
 *
20
 *
21
 * HTTP decompression routines
22
 *
23
 * Changelog
24
 * 20.06.2012  Tim Ruehsen  created
25
 * 31.12.2013  Tim Ruehsen  added XZ / LZMA decompression
26
 * 02.01.2014  Tim Ruehsen  added BZIP2 decompression
27
 * 24.02.2017  Tim Ruehsen  added Brotli decompression
28
 *
29
 * References
30
 *   https://en.wikipedia.org/wiki/HTTP_compression
31
 *   https://wiki.mozilla.org/LZMA2_Compression
32
 *   https://groups.google.com/forum/#!topic/mozilla.dev.platform/CBhSPWs3HS8
33
 *   https://github.com/google/brotli
34
 */
35
36
#include <config.h>
37
38
#include <stdio.h>
39
#include <string.h>
40
41
#ifdef WITH_ZLIB
42
#define ZLIB_CONST
43
#include <zlib.h>
44
#endif
45
46
#ifdef WITH_BZIP2
47
#include <bzlib.h>
48
#endif
49
50
#ifdef WITH_LZMA
51
#include <lzma.h>
52
#endif
53
54
#ifdef WITH_BROTLIDEC
55
#include <brotli/decode.h>
56
#endif
57
58
#ifdef WITH_ZSTD
59
#include <zstd.h>
60
#endif
61
62
#ifdef WITH_LZIP
63
#include <stdint.h>
64
#include <lzlib.h>
65
#endif
66
67
#include <wget.h>
68
#include "private.h"
69
70
typedef int wget_decompressor_decompress_fn(wget_decompressor *dc, const char *src, size_t srclen);
71
typedef void wget_decompressor_exit_fn(wget_decompressor *dc);
72
73
struct wget_decompressor_st {
74
#ifdef WITH_ZLIB
75
  z_stream
76
    z_strm;
77
#endif
78
#ifdef WITH_LZMA
79
  lzma_stream
80
    lzma_strm;
81
#endif
82
#ifdef WITH_BZIP2
83
  bz_stream
84
    bz_strm;
85
#endif
86
#ifdef WITH_BROTLIDEC
87
  BrotliDecoderState
88
    *brotli_strm;
89
#endif
90
#ifdef WITH_ZSTD
91
  ZSTD_DStream
92
    *zstd_strm;
93
#endif
94
#ifdef WITH_LZIP
95
  struct LZ_Decoder
96
    *lzip_strm;
97
#endif
98
99
  wget_decompressor_sink_fn
100
    *sink; // decompressed data goes here
101
  wget_decompressor_error_handler
102
    *error_handler; // called on error
103
  wget_decompressor_decompress_fn
104
    *decompress;
105
  wget_decompressor_exit_fn
106
    *exit;
107
  void
108
    *context; // given to sink()
109
  wget_content_encoding
110
    encoding;
111
  bool
112
    inflating; // deflate/gzip succeeded the init phase
113
};
114
115
#ifdef WITH_ZLIB
116
static int gzip_init(z_stream *strm)
117
0
{
118
0
  memset(strm, 0, sizeof(*strm));
119
120
  // +16: decode gzip format only
121
  // +32: decode gzip and zlib (autodetect)
122
0
  if (inflateInit2(strm, 15 + 32) != Z_OK) {
123
0
    error_printf(_("Failed to init gzip decompression\n"));
124
0
    return -1;
125
0
  }
126
127
0
  return 0;
128
0
}
129
130
static int gzip_decompress(wget_decompressor *dc, const char *src, size_t srclen)
131
0
{
132
0
  z_stream *strm;
133
0
  char dst[10240];
134
0
  int status;
135
136
0
  if (!srclen) {
137
    // special case to avoid decompress errors
138
0
    if (dc->sink)
139
0
      dc->sink(dc->context, "", 0);
140
141
0
    return 0;
142
0
  }
143
144
0
  strm = &dc->z_strm;
145
0
restart:
146
0
  strm->next_in = (const unsigned char *) src;
147
0
  strm->avail_in = (unsigned int) srclen;
148
149
0
  do {
150
0
    strm->next_out = (unsigned char *) dst;
151
0
    strm->avail_out = sizeof(dst);
152
153
0
    status = inflate(strm, Z_SYNC_FLUSH);
154
0
    if (status == Z_DATA_ERROR && !dc->inflating)  {
155
      // Looks like some servers send gzip/deflate streams without header.
156
      // Reported at https://gitlab.com/gnuwget/wget2/-/issues/532.
157
0
      inflateEnd(strm);
158
0
      if(inflateInit2(strm, -MAX_WBITS) != Z_OK) {
159
0
        error_printf(_("Failed to re-init deflate/gzip decompression\n"));
160
0
        return -1;
161
0
      }
162
0
      dc->inflating = true;
163
0
      goto restart;
164
0
    }
165
0
    dc->inflating = true;
166
0
    if ((status == Z_OK || status == Z_STREAM_END) && strm->avail_out < sizeof(dst)) {
167
0
      if (dc->sink)
168
0
        dc->sink(dc->context, dst, sizeof(dst) - strm->avail_out);
169
0
    }
170
0
  } while (status == Z_OK && !strm->avail_out);
171
172
0
  if (status == Z_OK || status == Z_BUF_ERROR || status == Z_STREAM_END)
173
0
    return 0;
174
175
0
  error_printf(_("Failed to uncompress gzip stream (%d)\n"), status);
176
0
  return -1;
177
0
}
178
179
static void gzip_exit(wget_decompressor *dc)
180
0
{
181
0
  int status;
182
183
0
  if ((status = inflateEnd(&dc->z_strm)) != Z_OK) {
184
0
    error_printf(_("Failed to close gzip stream (%d)\n"), status);
185
0
  }
186
0
}
187
188
static int deflate_init(z_stream *strm)
189
0
{
190
0
  memset(strm, 0, sizeof(*strm));
191
192
0
  if (inflateInit(strm) != Z_OK) {
193
0
    error_printf(_("Failed to init deflate decompression\n"));
194
0
    return -1;
195
0
  }
196
197
0
  return 0;
198
0
}
199
#endif // WITH_ZLIB
200
201
#ifdef WITH_LZMA
202
static int lzma_init(lzma_stream *strm)
203
{
204
  memset(strm, 0, sizeof(*strm));
205
206
//  if (lzma_stream_decoder(strm, UINT64_MAX, LZMA_TELL_UNSUPPORTED_CHECK | LZMA_CONCATENATED) != LZMA_OK) {
207
  if (lzma_auto_decoder(strm, UINT64_MAX, 0) != LZMA_OK) {
208
    error_printf(_("Failed to init LZMA decompression\n"));
209
    return -1;
210
  }
211
212
  return 0;
213
}
214
215
static int lzma_decompress(wget_decompressor *dc, const char *src, size_t srclen)
216
{
217
  lzma_stream *strm;
218
  char dst[10240];
219
  int status;
220
221
  if (!srclen) {
222
    // special case to avoid decompress errors
223
    if (dc->sink)
224
      dc->sink(dc->context, "", 0);
225
226
    return 0;
227
  }
228
229
  strm = &dc->lzma_strm;
230
  strm->next_in = (const uint8_t *) src;
231
  strm->avail_in = srclen;
232
233
  do {
234
    strm->next_out = (unsigned char *) dst;
235
    strm->avail_out = sizeof(dst);
236
237
    status = lzma_code(strm, LZMA_RUN);
238
    if ((status == LZMA_OK || status == LZMA_STREAM_END) && strm->avail_out<sizeof(dst)) {
239
      if (dc->sink)
240
        dc->sink(dc->context, dst, sizeof(dst) - strm->avail_out);
241
    }
242
  } while (status == LZMA_OK && !strm->avail_out);
243
244
  if (status == LZMA_OK || status == LZMA_STREAM_END)
245
    return 0;
246
247
  error_printf(_("Failed to uncompress LZMA stream (%d)\n"), status);
248
  return -1;
249
}
250
251
static void lzma_exit(wget_decompressor *dc)
252
{
253
  lzma_end(&dc->lzma_strm);
254
}
255
#endif // WITH_LZMA
256
257
#ifdef WITH_BROTLIDEC
258
static int brotli_init(BrotliDecoderState **strm)
259
{
260
  if ((*strm = BrotliDecoderCreateInstance(NULL, NULL, NULL)) == NULL) {
261
    error_printf(_("Failed to init Brotli decompression\n"));
262
    return -1;
263
  }
264
265
  return 0;
266
}
267
268
static int brotli_decompress(wget_decompressor *dc, const char *src, size_t srclen)
269
{
270
  BrotliDecoderState *strm;
271
  BrotliDecoderResult status;
272
  uint8_t dst[10240];
273
  size_t available_in, available_out;
274
  const uint8_t *next_in;
275
  uint8_t *next_out;
276
277
  if (!srclen) {
278
    // special case to avoid decompress errors
279
    if (dc->sink)
280
      dc->sink(dc->context, "", 0);
281
282
    return 0;
283
  }
284
285
  strm = dc->brotli_strm;
286
  next_in = (const uint8_t *) src;
287
  available_in = srclen;
288
289
  do {
290
    next_out = (unsigned char *)dst;
291
    available_out = sizeof(dst);
292
293
    status = BrotliDecoderDecompressStream(strm, &available_in, &next_in, &available_out, &next_out, NULL);
294
    if (available_out != sizeof(dst)) {
295
      if (dc->sink)
296
        dc->sink(dc->context, (char *)dst, sizeof(dst) - available_out);
297
    }
298
  } while (status == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT);
299
300
  if (status == BROTLI_DECODER_RESULT_SUCCESS || status == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT)
301
    return 0;
302
303
  BrotliDecoderErrorCode err = BrotliDecoderGetErrorCode(strm);
304
  error_printf(_("Failed to uncompress Brotli stream (%u): %s\n"), status, BrotliDecoderErrorString(err));
305
306
  return -1;
307
}
308
309
static void brotli_exit(wget_decompressor *dc)
310
{
311
  BrotliDecoderDestroyInstance(dc->brotli_strm);
312
}
313
#endif // WITH_BROTLIDEC
314
315
#ifdef WITH_ZSTD
316
static int zstd_init(ZSTD_DStream **strm)
317
{
318
  if ((*strm = ZSTD_createDStream()) == NULL) {
319
    error_printf(_("Failed to create Zstandard decompression\n"));
320
    return -1;
321
  }
322
323
  size_t rc = ZSTD_initDStream(*strm);
324
  if (ZSTD_isError(rc)) {
325
    error_printf(_("Failed to init Zstandard decompression: %s\n"), ZSTD_getErrorName(rc));
326
    ZSTD_freeDStream(*strm);
327
    *strm = NULL;
328
    return -1;
329
  }
330
331
  return 0;
332
}
333
334
static int zstd_decompress(wget_decompressor *dc, const char *src, size_t srclen)
335
{
336
  ZSTD_DStream *strm;
337
  uint8_t dst[10240];
338
339
  if (!srclen) {
340
    // special case to avoid decompress errors
341
    if (dc->sink)
342
      dc->sink(dc->context, "", 0);
343
344
    return 0;
345
  }
346
347
  strm = dc->zstd_strm;
348
349
  ZSTD_inBuffer input = { .src = src, .size = srclen, .pos = 0 };
350
351
  while (input.pos < input.size) {
352
    ZSTD_outBuffer output = { .dst = dst, .size = sizeof(dst), .pos = 0 };
353
354
    size_t rc = ZSTD_decompressStream(strm, &output , &input);
355
    if (ZSTD_isError(rc)) {
356
      error_printf(_("Failed to uncompress Zstandard stream: %s\n"), ZSTD_getErrorName(rc));
357
      return -1;
358
    }
359
360
    if (output.pos && dc->sink)
361
      dc->sink(dc->context, (char *)dst, output.pos);
362
  }
363
364
  return 0;
365
}
366
367
static void zstd_exit(wget_decompressor *dc)
368
{
369
  ZSTD_freeDStream(dc->zstd_strm);
370
}
371
#endif // WITH_ZSTD
372
373
#ifdef WITH_LZIP
374
static int lzip_init(struct LZ_Decoder **strm)
375
{
376
  if ((*strm = LZ_decompress_open()) == NULL) {
377
    error_printf(_("Failed to create lzip decompression\n"));
378
    return -1;
379
  }
380
381
  // docs say, we have to check the pointer
382
  enum LZ_Errno err;
383
  if ((err = LZ_decompress_errno(*strm)) != LZ_ok) {
384
    error_printf(_("Failed to create lzip decompression: %d %s\n"), (int) err, LZ_strerror(err));
385
    LZ_decompress_close(*strm);
386
    return -1;
387
  }
388
389
  return 0;
390
}
391
392
static int lzip_drain(wget_decompressor *dc)
393
{
394
  struct LZ_Decoder *strm = dc->lzip_strm;
395
  uint8_t dst[10240];
396
  int rbytes;
397
  enum LZ_Errno err;
398
399
  while ((rbytes = LZ_decompress_read(strm, dst, sizeof(dst))) > 0) {
400
    if (dc->sink)
401
      dc->sink(dc->context, (char *) dst, rbytes);
402
  }
403
404
  if ((err = LZ_decompress_errno(strm)) != LZ_ok) {
405
    error_printf(_("Failed to uncompress lzip stream: %d %s\n"), (int) err, LZ_strerror(err));
406
    return -1;
407
  }
408
409
  return 0;
410
}
411
412
static int lzip_decompress(wget_decompressor *dc, const char *src, size_t srclen)
413
{
414
  struct LZ_Decoder *strm;
415
  int available_in;
416
  const uint8_t *next_in;
417
  int wbytes;
418
419
  if (!srclen) {
420
    // special case to avoid decompress errors
421
    if (dc->sink)
422
      dc->sink(dc->context, "", 0);
423
424
    return 0;
425
  }
426
427
  strm = dc->lzip_strm;
428
  next_in = (const uint8_t *) src;
429
  available_in = (int) srclen;
430
431
  do {
432
    wbytes = LZ_decompress_write(strm, next_in, available_in);
433
    next_in += wbytes;
434
    available_in -= wbytes;
435
436
    if (lzip_drain(dc) < 0)
437
      return -1;
438
  } while (wbytes > 0);
439
440
  return 0;
441
}
442
443
static void lzip_exit(wget_decompressor *dc)
444
{
445
  struct LZ_Decoder *strm = dc->lzip_strm;
446
447
  if (LZ_decompress_finish(strm) == 0)
448
    lzip_drain(dc);
449
450
  LZ_decompress_close(strm);
451
}
452
#endif // WITH_LZIP
453
454
#ifdef WITH_BZIP2
455
static int bzip2_init(bz_stream *strm)
456
{
457
  memset(strm, 0, sizeof(*strm));
458
459
  if (BZ2_bzDecompressInit(strm, 0, 0) != BZ_OK) {
460
    error_printf(_("Failed to init bzip2 decompression\n"));
461
    return -1;
462
  }
463
464
  return 0;
465
}
466
467
static int bzip2_decompress(wget_decompressor *dc, const char *src, size_t srclen)
468
{
469
  bz_stream *strm;
470
  char dst[10240];
471
  int status;
472
473
  if (!srclen) {
474
    // special case to avoid decompress errors
475
    if (dc->sink)
476
      dc->sink(dc->context, "", 0);
477
478
    return 0;
479
  }
480
481
  strm = &dc->bz_strm;
482
  strm->next_in = (char *) src;
483
  strm->avail_in = (unsigned int) srclen;
484
485
  do {
486
    strm->next_out = dst;
487
    strm->avail_out = sizeof(dst);
488
489
    status = BZ2_bzDecompress(strm);
490
    if ((status == BZ_OK || status == BZ_STREAM_END) && strm->avail_out<sizeof(dst)) {
491
      if (dc->sink)
492
        dc->sink(dc->context, dst, sizeof(dst) - strm->avail_out);
493
    }
494
  } while (status == BZ_OK && !strm->avail_out);
495
496
  if (status == BZ_OK || status == BZ_STREAM_END)
497
    return 0;
498
499
  error_printf(_("Failed to uncompress bzip2 stream (%d)\n"), status);
500
  return -1;
501
}
502
503
static void bzip2_exit(wget_decompressor *dc)
504
{
505
  BZ2_bzDecompressEnd(&dc->bz_strm);
506
}
507
#endif // WITH_BZIP2
508
509
static int identity(wget_decompressor *dc, const char *src, size_t srclen)
510
2.33k
{
511
2.33k
  if (dc->sink)
512
2.33k
    dc->sink(dc->context, src, srclen);
513
514
2.33k
  return 0;
515
2.33k
}
516
517
wget_decompressor *wget_decompress_open(
518
  wget_content_encoding encoding,
519
  wget_decompressor_sink_fn *sink,
520
  void *context)
521
1.91k
{
522
1.91k
  int rc = 0;
523
1.91k
  wget_decompressor *dc = wget_calloc(1, sizeof(wget_decompressor));
524
525
1.91k
  if (!dc)
526
0
    return NULL;
527
528
1.91k
  if (encoding == wget_content_encoding_gzip) {
529
0
#ifdef WITH_ZLIB
530
0
    if ((rc = gzip_init(&dc->z_strm)) == 0) {
531
0
      dc->decompress = gzip_decompress;
532
0
      dc->exit = gzip_exit;
533
0
    }
534
0
#endif
535
1.91k
  } else if (encoding == wget_content_encoding_deflate) {
536
0
#ifdef WITH_ZLIB
537
0
    if ((rc = deflate_init(&dc->z_strm)) == 0) {
538
0
      dc->decompress = gzip_decompress;
539
0
      dc->exit = gzip_exit;
540
0
    }
541
0
#endif
542
1.91k
  } else if (encoding == wget_content_encoding_bzip2) {
543
#ifdef WITH_BZIP2
544
    if ((rc = bzip2_init(&dc->bz_strm)) == 0) {
545
      dc->decompress = bzip2_decompress;
546
      dc->exit = bzip2_exit;
547
    }
548
#endif
549
1.91k
  } else if (encoding == wget_content_encoding_lzma || encoding == wget_content_encoding_xz) {
550
#ifdef WITH_LZMA
551
    if ((rc = lzma_init(&dc->lzma_strm)) == 0) {
552
      dc->decompress = lzma_decompress;
553
      dc->exit = lzma_exit;
554
    }
555
#endif
556
1.91k
  } else if (encoding == wget_content_encoding_brotli) {
557
#ifdef WITH_BROTLIDEC
558
    if ((rc = brotli_init(&dc->brotli_strm)) == 0) {
559
      dc->decompress = brotli_decompress;
560
      dc->exit = brotli_exit;
561
    }
562
#endif
563
1.91k
  } else if (encoding == wget_content_encoding_zstd) {
564
#ifdef WITH_ZSTD
565
    if ((rc = zstd_init(&dc->zstd_strm)) == 0) {
566
      dc->decompress = zstd_decompress;
567
      dc->exit = zstd_exit;
568
    }
569
#endif
570
1.91k
  } else if (encoding == wget_content_encoding_lzip) {
571
#ifdef WITH_LZIP
572
    if ((rc = lzip_init(&dc->lzip_strm)) == 0) {
573
      dc->decompress = lzip_decompress;
574
      dc->exit = lzip_exit;
575
    }
576
#endif
577
0
  }
578
579
1.91k
  if (!dc->decompress) {
580
    // identity
581
1.91k
    if (encoding != wget_content_encoding_identity)
582
0
      debug_printf("Falling back to Content-Encoding 'identity'\n");
583
1.91k
    dc->decompress = identity;
584
1.91k
  }
585
586
1.91k
  if (rc) {
587
0
    xfree(dc);
588
0
    return NULL;
589
0
  }
590
591
1.91k
  dc->encoding = encoding;
592
1.91k
  dc->sink = sink;
593
1.91k
  dc->context = context;
594
1.91k
  return dc;
595
1.91k
}
596
597
void wget_decompress_close(wget_decompressor *dc)
598
3.82k
{
599
3.82k
  if (dc) {
600
1.91k
    if (dc->exit)
601
0
      dc->exit(dc);
602
1.91k
    xfree(dc);
603
1.91k
  }
604
3.82k
}
605
606
int wget_decompress(wget_decompressor *dc, const char *src, size_t srclen)
607
2.33k
{
608
2.33k
  if (dc) {
609
2.33k
    int rc = dc->decompress(dc, src, srclen);
610
611
2.33k
    if (rc && dc->error_handler)
612
0
      dc->error_handler(dc, rc);
613
2.33k
  }
614
615
2.33k
  return 0;
616
2.33k
}
617
618
void wget_decompress_set_error_handler(wget_decompressor *dc, wget_decompressor_error_handler *error_handler)
619
1.91k
{
620
1.91k
  if (dc)
621
1.91k
    dc->error_handler = error_handler;
622
1.91k
}
623
624
void *wget_decompress_get_context(wget_decompressor *dc)
625
0
{
626
0
  return dc ? dc->context : NULL;
627
0
}
628
629
static char _encoding_names[][9] = {
630
  [wget_content_encoding_identity] = "identity",
631
  [wget_content_encoding_gzip] = "gzip",
632
  [wget_content_encoding_deflate] = "deflate",
633
  [wget_content_encoding_xz] = "xz",
634
  [wget_content_encoding_lzma] = "lzma",
635
  [wget_content_encoding_bzip2] = "bzip2",
636
  [wget_content_encoding_brotli] = "br",
637
  [wget_content_encoding_zstd] = "zstd",
638
  [wget_content_encoding_lzip] = "lzip",
639
};
640
641
wget_content_encoding wget_content_encoding_by_name(const char *name)
642
1.43k
{
643
1.43k
  if (name) {
644
9.54k
    for (wget_content_encoding it = 0; it < wget_content_encoding_max; it++) {
645
8.69k
      if (!strcmp(_encoding_names[it], name))
646
581
        return it;
647
8.69k
    }
648
649
849
    if (!strcmp("none", name))
650
708
      return wget_content_encoding_identity;
651
849
  }
652
653
141
  return wget_content_encoding_unknown;
654
1.43k
}
655
656
const char *wget_content_encoding_to_name(wget_content_encoding type)
657
7
{
658
7
  if (type >= 0 && type < wget_content_encoding_max)
659
7
    return _encoding_names[type];
660
661
0
  return NULL;
662
7
}