Coverage Report

Created: 2024-05-20 06:23

/src/mupdf/source/fitz/unlibarchive.c
Line
Count
Source (jump to first uncovered line)
1
// Copyright (C) 2023 Artifex Software, Inc.
2
//
3
// This file is part of MuPDF.
4
//
5
// MuPDF is free software: you can redistribute it and/or modify it under the
6
// terms of the GNU Affero General Public License as published by the Free
7
// Software Foundation, either version 3 of the License, or (at your option)
8
// any later version.
9
//
10
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13
// details.
14
//
15
// You should have received a copy of the GNU Affero General Public License
16
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17
//
18
// Alternative licensing terms are available from the licensor.
19
// For commercial licensing, see <https://www.artifex.com/> or contact
20
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21
// CA 94129, USA, for further information.
22
23
#include "mupdf/fitz.h"
24
25
#ifdef HAVE_LIBARCHIVE
26
27
#ifdef _WIN32
28
#include "libarchive/archive.h"
29
#include "libarchive/archive_entry.h"
30
#else
31
#include <archive.h>
32
#include <archive_entry.h>
33
#endif
34
35
typedef struct
36
{
37
  size_t len;
38
  uint8_t name[32];
39
} entry_t;
40
41
typedef struct
42
{
43
  fz_archive super;
44
45
  struct archive *archive;
46
47
  int current_entry_idx;
48
49
  int entries_max;
50
  int entries_len;
51
  entry_t **entries;
52
53
  fz_context *ctx; /* safe! */
54
  uint8_t block[4096];
55
} fz_libarchive_archive;
56
57
static la_ssize_t
58
libarchive_read(struct archive *a, void *client_data, const void **buf)
59
{
60
  fz_libarchive_archive *arch = (fz_libarchive_archive *)client_data;
61
  size_t z;
62
  uint8_t *p;
63
  size_t left;
64
  fz_context *ctx = arch->ctx;
65
  la_ssize_t ret = 0;
66
67
  fz_try(ctx)
68
  {
69
    z = fz_available(arch->ctx, arch->super.file, 1024);
70
71
    /* If we're at the EOF, can't read anything! */
72
    if (z == 0)
73
      break;
74
75
    /* If we have at least 1K, then just return the pointer to that
76
     * directly. */
77
    if (z >= 1024)
78
    {
79
      *buf = arch->super.file->rp;
80
      arch->super.file->rp += z;
81
      ret = (la_ssize_t)z;
82
      break;
83
    }
84
85
    /* If not, let's pull a large enough lump out. */
86
87
    left = sizeof(arch->block);
88
    p = arch->block;
89
    do
90
    {
91
      memcpy(p, arch->super.file->rp, z);
92
      p += z;
93
      arch->super.file->rp += z;
94
      left -= z;
95
      if (left)
96
      {
97
        z = fz_available(arch->ctx, arch->super.file, left);
98
        if (z > left)
99
          z = left;
100
        if (z == 0)
101
          break;
102
      }
103
    }
104
    while (left != 0);
105
106
    ret = p - arch->block;
107
    *buf = arch->block;
108
  }
109
  fz_catch(ctx)
110
  {
111
    /* Ignore error */
112
    archive_set_error(a, ARCHIVE_FATAL, "%s", fz_convert_error(ctx, NULL));
113
    return -1;
114
  }
115
116
  return ret;
117
}
118
119
static la_int64_t
120
libarchive_skip(struct archive *a, void *client_data, la_int64_t skip)
121
{
122
  fz_libarchive_archive *arch = (fz_libarchive_archive *)client_data;
123
  int64_t pos;
124
  fz_context *ctx = arch->ctx;
125
126
  fz_try(ctx)
127
  {
128
    pos = fz_tell(arch->ctx, arch->super.file);
129
    fz_seek(arch->ctx, arch->super.file, pos + skip, SEEK_SET);
130
    pos = fz_tell(arch->ctx, arch->super.file) - pos;
131
  }
132
  fz_catch(ctx)
133
  {
134
    /* Ignore error */
135
    archive_set_error(a, ARCHIVE_FATAL, "%s", fz_convert_error(ctx, NULL));
136
    return -1;
137
  }
138
139
  return pos;
140
}
141
142
static la_int64_t
143
libarchive_seek(struct archive *a, void *client_data, la_int64_t offset, int whence)
144
{
145
  fz_libarchive_archive *arch = (fz_libarchive_archive *)client_data;
146
  fz_context *ctx = arch->ctx;
147
  int64_t pos;
148
149
  fz_try(ctx)
150
  {
151
    fz_seek(arch->ctx, arch->super.file, offset, whence);
152
    pos = fz_tell(arch->ctx, arch->super.file);
153
  }
154
  fz_catch(ctx)
155
  {
156
    /* Ignore error */
157
    archive_set_error(a, ARCHIVE_FATAL, "%s", fz_convert_error(ctx, NULL));
158
    return -1;
159
  }
160
161
  return pos;
162
}
163
164
static int
165
libarchive_close(struct archive *a, void *client_data)
166
{
167
  /* Nothing to do. Stream is dropped when the fz_archive is closed. */
168
  return ARCHIVE_OK;
169
}
170
171
static int
172
libarchive_open(fz_context *ctx, fz_libarchive_archive *arch)
173
{
174
  int r;
175
176
  arch->archive = archive_read_new();
177
  archive_read_support_filter_all(arch->archive);
178
  archive_read_support_format_all(arch->archive);
179
180
  arch->ctx = ctx;
181
  r = archive_read_set_seek_callback(arch->archive, libarchive_seek);
182
  if (r == ARCHIVE_OK)
183
    r = archive_read_open2(arch->archive, arch, NULL, libarchive_read, libarchive_skip, libarchive_close);
184
  arch->ctx = NULL;
185
  if (r != ARCHIVE_OK)
186
  {
187
    archive_read_free(arch->archive);
188
    arch->archive = NULL;
189
  }
190
191
  return r != ARCHIVE_OK;
192
}
193
194
static void
195
libarchive_reset(fz_context *ctx, fz_libarchive_archive *arch)
196
{
197
  if (arch->archive)
198
  {
199
    archive_read_free(arch->archive);
200
    arch->archive = NULL;
201
  }
202
  fz_seek(ctx, arch->super.file, 0, SEEK_SET);
203
  if (libarchive_open(ctx, arch))
204
    fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to restart archive traversal!");
205
206
  arch->current_entry_idx = 0;
207
}
208
209
static void
210
drop_libarchive_archive(fz_context *ctx, fz_archive *arch_)
211
{
212
  fz_libarchive_archive *arch = (fz_libarchive_archive *)arch_;
213
  int i;
214
215
  archive_read_free(arch->archive);
216
  for (i = 0; i < arch->entries_len; ++i)
217
    fz_free(ctx, arch->entries[i]);
218
  fz_free(ctx, arch->entries);
219
  arch->archive = NULL;
220
}
221
222
int
223
fz_is_libarchive_archive(fz_context *ctx, fz_stream *file)
224
{
225
  fz_libarchive_archive arch;
226
  struct archive_entry *entry;
227
  int ret;
228
229
  arch.super.file = file;
230
  fz_seek(ctx, file, 0, SEEK_SET);
231
232
  /* Annoyingly, libarchive can say "sure, I can open this" only to
233
   * then fail when we try to read from it. We therefore need to
234
   * try to read at least 1 entry out to be sure. */
235
  ret = libarchive_open(ctx, &arch);
236
  if (ret == ARCHIVE_OK)
237
  {
238
    fz_var(ret);
239
240
    fz_try(ctx)
241
    {
242
      arch.ctx = ctx; /* safe */
243
      ret = archive_read_next_header(arch.archive, &entry);
244
    }
245
    fz_catch(ctx)
246
    {
247
      archive_read_free(arch.archive);
248
      fz_rethrow(ctx);
249
    }
250
  }
251
252
  archive_read_free(arch.archive);
253
254
  /* Do NOT return true if we get ARCHIVE_EOF. We will fail to recognise empty
255
   * archives, but the alternative is false positives. */
256
  return ret == ARCHIVE_OK;
257
}
258
259
static int
260
lookup_archive_entry(fz_context *ctx, fz_libarchive_archive *arch, const char *name)
261
{
262
  int idx;
263
264
  for (idx = 0; idx < arch->entries_len; idx++)
265
  {
266
    if (!strcmp(name, (const char *)arch->entries[idx]->name))
267
      return idx;
268
  }
269
270
  return -1;
271
}
272
273
static int has_libarchive_entry(fz_context *ctx, fz_archive *arch_, const char *name)
274
{
275
  fz_libarchive_archive *arch = (fz_libarchive_archive *)arch_;
276
  return lookup_archive_entry(ctx, arch, name) != -1;
277
}
278
279
static const char *list_libarchive_entry(fz_context *ctx, fz_archive *arch_, int idx)
280
{
281
  fz_libarchive_archive *arch = (fz_libarchive_archive *)arch_;
282
  if (idx < 0 || idx >= arch->entries_len)
283
    return NULL;
284
  return (const char *)arch->entries[idx]->name;
285
}
286
287
static int count_libarchive_entries(fz_context *ctx, fz_archive *arch_)
288
{
289
  fz_libarchive_archive *arch = (fz_libarchive_archive *)arch_;
290
  return arch->entries_len;
291
}
292
293
static fz_buffer *
294
read_libarchive_entry(fz_context *ctx, fz_archive *arch_, const char *name)
295
{
296
  fz_libarchive_archive *arch = (fz_libarchive_archive *)arch_;
297
  fz_buffer *ubuf = NULL;
298
  int idx;
299
  struct archive_entry *entry;
300
  la_ssize_t ret;
301
  size_t size;
302
303
  idx = lookup_archive_entry(ctx, arch, name);
304
  if (idx < 0)
305
    return NULL;
306
307
  if (arch->current_entry_idx > idx)
308
    libarchive_reset(ctx, arch);
309
310
  fz_var(ubuf);
311
312
  arch->ctx = ctx;
313
  fz_try(ctx)
314
  {
315
    while (arch->current_entry_idx < idx)
316
    {
317
      int r = archive_read_next_header(arch->archive, &entry);
318
      if (r == ARCHIVE_OK)
319
        r = archive_read_data_skip(arch->archive);
320
      if (r != ARCHIVE_OK)
321
        fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to skip over archive entry");
322
      arch->current_entry_idx++;
323
    }
324
325
    /* This is the one we want. */
326
    if (archive_read_next_header(arch->archive, &entry) != ARCHIVE_OK)
327
      fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to read archive entry header");
328
329
    arch->current_entry_idx++;
330
    size = arch->entries[idx]->len;
331
    ubuf = fz_new_buffer(ctx, size);
332
    ubuf->len = size;
333
334
    ret = archive_read_data(arch->archive, ubuf->data, size);
335
    if (ret < 0)
336
      fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to read archive data");
337
    if ((size_t)ret != size)
338
      fz_warn(ctx, "Premature end of data reading archive entry data (%zu vs %zu)", (size_t)ubuf->len, (size_t)size);
339
  }
340
  fz_always(ctx)
341
    arch->ctx = NULL;
342
  fz_catch(ctx)
343
  {
344
    fz_drop_buffer(ctx, ubuf);
345
    fz_rethrow(ctx);
346
  }
347
348
  return ubuf;
349
}
350
351
static fz_stream *
352
open_libarchive_entry(fz_context *ctx, fz_archive *arch_, const char *name)
353
{
354
  fz_buffer *buf = read_libarchive_entry(ctx, arch_, name);
355
  fz_stream *stm = NULL;
356
357
  fz_try(ctx)
358
    stm = fz_open_buffer(ctx, buf);
359
  fz_always(ctx)
360
    fz_drop_buffer(ctx, buf);
361
  fz_catch(ctx)
362
    fz_rethrow(ctx);
363
364
  return stm;
365
}
366
367
fz_archive *
368
fz_open_libarchive_archive_with_stream(fz_context *ctx, fz_stream *file)
369
{
370
  fz_libarchive_archive *arch = fz_new_derived_archive(ctx, file, fz_libarchive_archive);
371
  int r;
372
  int free_path = 0;
373
  const char *path = NULL;
374
375
  fz_seek(ctx, file, 0, SEEK_SET);
376
377
  if (libarchive_open(ctx, arch) != ARCHIVE_OK)
378
  {
379
    fz_drop_archive(ctx, &arch->super);
380
    fz_throw(ctx, FZ_ERROR_LIBRARY, "cannot recognize libarchive archive");
381
  }
382
383
  arch->super.format = "libarchive";
384
  arch->super.count_entries = count_libarchive_entries;
385
  arch->super.list_entry = list_libarchive_entry;
386
  arch->super.has_entry = has_libarchive_entry;
387
  arch->super.read_entry = read_libarchive_entry;
388
  arch->super.open_entry = open_libarchive_entry;
389
  arch->super.drop_archive = drop_libarchive_archive;
390
391
  fz_var(free_path);
392
  fz_var(path);
393
394
  fz_try(ctx)
395
  {
396
    arch->ctx = ctx;
397
    /* Count the archive entries */
398
    do
399
    {
400
      struct archive_entry *entry;
401
      size_t z;
402
403
      r = archive_read_next_header(arch->archive, &entry);
404
      if (r == ARCHIVE_EOF)
405
        break;
406
407
      if (r != ARCHIVE_OK)
408
        fz_throw(ctx, FZ_ERROR_LIBRARY, "Corrupt archive");
409
410
      free_path = 0;
411
      path = archive_entry_pathname_utf8(entry);
412
      if (!path)
413
      {
414
        path = fz_utf8_from_wchar(ctx, archive_entry_pathname_w(entry));
415
        free_path = 1;
416
      }
417
      if (!path)
418
        continue;
419
420
      if (arch->entries_len == arch->entries_max)
421
      {
422
        int new_max = arch->entries_max * 2;
423
        if (new_max == 0)
424
          new_max = 32;
425
426
        arch->entries = fz_realloc(ctx, arch->entries, sizeof(arch->entries[0]) * new_max);
427
        arch->entries_max = new_max;
428
      }
429
430
      z = strlen(path);
431
      arch->entries[arch->entries_len] = fz_malloc(ctx, sizeof(entry_t) - 32 + z + 1);
432
      memcpy(&arch->entries[arch->entries_len]->name[0], path, z+1);
433
      if (free_path)
434
      {
435
        fz_free(ctx, path);
436
        free_path = 0;
437
      }
438
      arch->entries[arch->entries_len]->len = archive_entry_size(entry);
439
440
      arch->entries_len++;
441
    }
442
    while (r != ARCHIVE_EOF && r != ARCHIVE_FATAL);
443
444
    libarchive_reset(ctx, arch);
445
  }
446
  fz_always(ctx)
447
  {
448
    if (free_path)
449
      fz_free(ctx, path);
450
  }
451
  fz_catch(ctx)
452
  {
453
    arch->ctx = NULL;
454
    fz_drop_archive(ctx, &arch->super);
455
    fz_rethrow(ctx);
456
  }
457
458
  return &arch->super;
459
}
460
461
fz_archive *
462
fz_open_libarchive_archive(fz_context *ctx, const char *filename)
463
{
464
  fz_archive *tar = NULL;
465
  fz_stream *file;
466
467
  file = fz_open_file(ctx, filename);
468
469
  fz_try(ctx)
470
    tar = fz_open_libarchive_archive_with_stream(ctx, file);
471
  fz_always(ctx)
472
    fz_drop_stream(ctx, file);
473
  fz_catch(ctx)
474
    fz_rethrow(ctx);
475
476
  return tar;
477
}
478
479
480
/* Universal decomp stream */
481
482
typedef struct
483
{
484
  fz_stream *chain;
485
  fz_context *ctx; /* Safe as not persistent. */
486
  struct archive *archive;
487
  struct archive_entry *entry;
488
  uint8_t block[4096];
489
} fz_libarchived_state;
490
491
static la_ssize_t
492
libarchived_read(struct archive *a, void *client_data, const void **buf)
493
{
494
  fz_libarchived_state *state = (fz_libarchived_state *)client_data;
495
  size_t z;
496
  uint8_t *p;
497
  size_t left;
498
  fz_context *ctx = state->ctx;
499
  la_ssize_t ret = 0;
500
501
  fz_try(ctx)
502
  {
503
    z = fz_available(ctx, state->chain, 1024);
504
505
    /* If we're at the EOF, can't read anything! */
506
    if (z == 0)
507
      break;
508
509
    /* If we have at least 1K, then just return the pointer to that
510
     * directly. */
511
    if (z >= 1024)
512
    {
513
      *buf = state->chain->rp;
514
      state->chain->rp += z;
515
      ret = (la_ssize_t)z;
516
      break;
517
    }
518
519
    /* If not, let's pull a large enough lump out. */
520
521
    left = sizeof(state->block);
522
    p = state->block;
523
    do
524
    {
525
      memcpy(p, state->chain->rp, z);
526
      p += z;
527
      state->chain->rp += z;
528
      left -= z;
529
      if (left)
530
      {
531
        z = fz_available(ctx, state->chain, left);
532
        if (z > left)
533
          z = left;
534
        if (z == 0)
535
          break;
536
      }
537
    }
538
    while (left != 0);
539
540
    ret = p - state->block;
541
    *buf = state->block;
542
  }
543
  fz_catch(ctx)
544
  {
545
    /* Ignore error */
546
    archive_set_error(a, ARCHIVE_FATAL, "%s", fz_convert_error(ctx, NULL));
547
    return -1;
548
  }
549
550
  return ret;
551
}
552
553
static la_int64_t
554
libarchived_skip(struct archive *a, void *client_data, la_int64_t skip)
555
{
556
  fz_libarchived_state *state = (fz_libarchived_state *)client_data;
557
  int64_t pos;
558
  fz_context *ctx = state->ctx;
559
560
  fz_try(ctx)
561
  {
562
    pos = fz_tell(state->ctx, state->chain);
563
    fz_seek(state->ctx, state->chain, pos + skip, SEEK_SET);
564
    pos = fz_tell(state->ctx, state->chain) - pos;
565
  }
566
  fz_catch(ctx)
567
  {
568
    /* Ignore error */
569
    archive_set_error(a, ARCHIVE_FATAL, "%s", fz_convert_error(ctx, NULL));
570
    return -1;
571
  }
572
573
  return pos;
574
}
575
576
static la_int64_t
577
libarchived_seek(struct archive *a, void *client_data, la_int64_t offset, int whence)
578
{
579
  fz_libarchived_state *state = (fz_libarchived_state *)client_data;
580
  fz_context *ctx = state->ctx;
581
  int64_t pos;
582
583
  fz_try(ctx)
584
  {
585
    fz_seek(ctx, state->chain, offset, whence);
586
    pos = fz_tell(ctx, state->chain);
587
  }
588
  fz_catch(ctx)
589
  {
590
    /* Ignore error */
591
    archive_set_error(a, ARCHIVE_FATAL, "%s", fz_convert_error(ctx, NULL));
592
    return -1;
593
  }
594
595
  return pos;
596
}
597
598
static int
599
libarchived_close(struct archive *a, void *client_data)
600
{
601
  /* Nothing to do. Stream is dropped when the fz_stream is dropped. */
602
  return ARCHIVE_OK;
603
}
604
605
static int
606
next_libarchived(fz_context *ctx, fz_stream *stm, size_t required)
607
{
608
  fz_libarchived_state *state = stm->state;
609
  la_ssize_t z;
610
611
  if (stm->eof)
612
    return EOF;
613
614
  z = archive_read_data(state->archive, state->block, sizeof(state->block));
615
  if (z < 0)
616
    fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to read compressed data");
617
  if (z == 0)
618
  {
619
    stm->eof = 1;
620
    return EOF;
621
  }
622
623
  stm->rp = state->block;
624
  stm->wp = state->block + z;
625
626
  return *stm->rp++;
627
}
628
629
static void
630
close_libarchived(fz_context *ctx, void *state_)
631
{
632
  fz_libarchived_state *state = (fz_libarchived_state *)state_;
633
  int code;
634
635
  state->ctx = ctx;
636
  code = archive_read_free(state->archive);
637
  state->ctx = NULL;
638
  if (code != ARCHIVE_OK)
639
    fz_warn(ctx, "libarchive error: archive_read_free: %d", code);
640
641
  fz_drop_stream(ctx, state->chain);
642
  fz_free(ctx, state);
643
}
644
645
fz_stream *
646
fz_open_libarchived(fz_context *ctx, fz_stream *chain)
647
{
648
  fz_libarchived_state *state;
649
  int r;
650
651
  state = fz_malloc_struct(ctx, fz_libarchived_state);
652
653
  state->chain = fz_keep_stream(ctx, chain);
654
  state->archive = archive_read_new();
655
  archive_read_support_filter_all(state->archive);
656
  archive_read_support_format_raw(state->archive);
657
658
  state->ctx = ctx;
659
  r = archive_read_set_seek_callback(state->archive, libarchived_seek);
660
  if (r == ARCHIVE_OK)
661
    r = archive_read_open2(state->archive, state, NULL, libarchived_read, libarchived_skip, libarchived_close);
662
  if (r != ARCHIVE_OK)
663
  {
664
    archive_read_free(state->archive);
665
    state->ctx = NULL;
666
    fz_drop_stream(ctx, state->chain);
667
    fz_free(ctx, state);
668
    fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to open archive");
669
  }
670
671
  r = archive_filter_code(state->archive, 0);
672
  if (r == ARCHIVE_FILTER_NONE)
673
  {
674
    archive_read_free(state->archive);
675
    state->ctx = NULL;
676
    fz_drop_stream(ctx, state->chain);
677
    fz_free(ctx, state);
678
    fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to open archive");
679
  }
680
681
  /* This is the one we want. */
682
  r = archive_read_next_header(state->archive, &state->entry);
683
  if (r != ARCHIVE_OK)
684
  {
685
    archive_read_free(state->archive);
686
    state->ctx = NULL;
687
    fz_drop_stream(ctx, state->chain);
688
    fz_free(ctx, state);
689
    fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to open archive");
690
  }
691
692
  return fz_new_stream(ctx, state, next_libarchived, close_libarchived);
693
}
694
695
#else
696
697
int
698
fz_is_libarchive_archive(fz_context *ctx, fz_stream *file)
699
0
{
700
0
  static int warned = 0;
701
702
0
  if (!warned)
703
0
  {
704
0
    warned = 1;
705
0
    fz_warn(ctx, "libarchive support not included");
706
0
  }
707
708
0
  return 0;
709
0
}
710
711
fz_archive *
712
fz_open_libarchive_archive_with_stream(fz_context *ctx, fz_stream *file)
713
0
{
714
0
  fz_throw(ctx, FZ_ERROR_UNSUPPORTED, "libarchive support not included");
715
716
0
  return NULL;
717
0
}
718
719
fz_archive *
720
fz_open_libarchive_archive(fz_context *ctx, const char *filename)
721
0
{
722
0
  fz_throw(ctx, FZ_ERROR_UNSUPPORTED, "libarchive support not included");
723
724
0
  return NULL;
725
0
}
726
727
fz_stream *
728
fz_open_libarchived(fz_context *ctx, fz_stream *chain)
729
0
{
730
0
  fz_throw(ctx, FZ_ERROR_UNSUPPORTED, "libarchive support not included");
731
732
0
  return NULL;
733
0
}
734
735
#endif