Coverage Report

Created: 2024-07-05 06:13

/src/mupdf/source/fitz/archive.c
Line
Count
Source (jump to first uncovered line)
1
// Copyright (C) 2004-2024 Artifex Software, Inc.
2
//
3
// This file is part of MuPDF.
4
//
5
// MuPDF is free software: you can redistribute it and/or modify it under the
6
// terms of the GNU Affero General Public License as published by the Free
7
// Software Foundation, either version 3 of the License, or (at your option)
8
// any later version.
9
//
10
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13
// details.
14
//
15
// You should have received a copy of the GNU Affero General Public License
16
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17
//
18
// Alternative licensing terms are available from the licensor.
19
// For commercial licensing, see <https://www.artifex.com/> or contact
20
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21
// CA 94129, USA, for further information.
22
23
#include "mupdf/fitz.h"
24
25
#include <string.h>
26
27
enum
28
{
29
  FZ_ARCHIVE_HANDLER_MAX = 32
30
};
31
32
struct fz_archive_handler_context
33
{
34
  int refs;
35
  int count;
36
  const fz_archive_handler *handler[FZ_ARCHIVE_HANDLER_MAX];
37
};
38
39
fz_stream *
40
fz_open_archive_entry(fz_context *ctx, fz_archive *arch, const char *name)
41
0
{
42
0
  fz_stream *stream = fz_try_open_archive_entry(ctx, arch, name);
43
44
0
  if (stream == NULL)
45
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find entry %s", name);
46
47
0
  return stream;
48
0
}
49
50
fz_stream *
51
fz_try_open_archive_entry(fz_context *ctx, fz_archive *arch, const char *name)
52
0
{
53
0
  char *local_name;
54
0
  fz_stream *stream = NULL;
55
56
0
  if (arch == NULL || !arch->open_entry)
57
0
    return NULL;
58
59
0
  local_name = fz_cleanname_strdup(ctx, name);
60
61
0
  fz_var(stream);
62
63
0
  fz_try(ctx)
64
0
    stream = arch->open_entry(ctx, arch, local_name);
65
0
  fz_always(ctx)
66
0
    fz_free(ctx, local_name);
67
0
  fz_catch(ctx)
68
0
    fz_rethrow(ctx);
69
70
0
  return stream;
71
0
}
72
73
fz_buffer *
74
fz_read_archive_entry(fz_context *ctx, fz_archive *arch, const char *name)
75
82
{
76
82
  fz_buffer *buf = fz_try_read_archive_entry(ctx, arch, name);
77
78
82
  if (buf == NULL)
79
1
    fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find entry %s", name);
80
81
81
  return buf;
82
82
}
83
84
fz_buffer *
85
fz_try_read_archive_entry(fz_context *ctx, fz_archive *arch, const char *name)
86
263
{
87
263
  char *local_name;
88
263
  fz_buffer *buf = NULL;
89
90
263
  if (arch == NULL || !arch->read_entry || !arch->has_entry || name == NULL)
91
0
    return NULL;
92
93
263
  local_name = fz_cleanname_strdup(ctx, name);
94
95
263
  fz_var(buf);
96
97
526
  fz_try(ctx)
98
526
  {
99
263
    if (!arch->has_entry(ctx, arch, local_name))
100
112
      break;
101
151
    buf = arch->read_entry(ctx, arch, local_name);
102
151
  }
103
526
  fz_always(ctx)
104
263
    fz_free(ctx, local_name);
105
263
  fz_catch(ctx)
106
22
    fz_rethrow(ctx);
107
108
241
  return buf;
109
263
}
110
111
int
112
fz_has_archive_entry(fz_context *ctx, fz_archive *arch, const char *name)
113
160
{
114
160
  char *local_name;
115
160
  int res = 0;
116
117
160
  if (arch == NULL)
118
0
    return 0;
119
160
  if (!arch->has_entry)
120
0
    return 0;
121
122
160
  local_name = fz_cleanname_strdup(ctx, name);
123
124
160
  fz_var(res);
125
126
320
  fz_try(ctx)
127
320
    res = arch->has_entry(ctx, arch, local_name);
128
320
  fz_always(ctx)
129
160
    fz_free(ctx, local_name);
130
160
  fz_catch(ctx)
131
0
    fz_rethrow(ctx);
132
133
160
  return res;
134
160
}
135
136
const char *
137
fz_list_archive_entry(fz_context *ctx, fz_archive *arch, int idx)
138
641
{
139
641
  if (arch == 0)
140
0
    return NULL;
141
641
  if (!arch->list_entry)
142
0
    return NULL;
143
144
641
  return arch->list_entry(ctx, arch, idx);
145
641
}
146
147
int
148
fz_count_archive_entries(fz_context *ctx, fz_archive *arch)
149
51
{
150
51
  if (arch == NULL)
151
0
    return 0;
152
51
  if (!arch->count_entries)
153
0
    return 0;
154
51
  return arch->count_entries(ctx, arch);
155
51
}
156
157
const char *
158
fz_archive_format(fz_context *ctx, fz_archive *arch)
159
0
{
160
0
  if (arch == NULL)
161
0
    return "undefined";
162
0
  return arch->format;
163
0
}
164
165
fz_archive *
166
fz_new_archive_of_size(fz_context *ctx, fz_stream *file, int size)
167
642
{
168
642
  fz_archive *arch;
169
642
  arch = Memento_label(fz_calloc(ctx, 1, size), "fz_archive");
170
642
  arch->refs = 1;
171
642
  arch->file = fz_keep_stream(ctx, file);
172
642
  return arch;
173
642
}
174
175
fz_archive *
176
fz_try_open_archive_with_stream(fz_context *ctx, fz_stream *file)
177
35.3k
{
178
35.3k
  fz_archive *arch = NULL;
179
35.3k
  int i;
180
181
35.3k
  if (file == NULL)
182
0
    return NULL;
183
184
140k
  for (i = 0; i < ctx->archive->count; i++)
185
105k
  {
186
105k
    fz_seek(ctx, file, 0, SEEK_SET);
187
105k
    if (ctx->archive->handler[i]->recognize(ctx, file))
188
642
    {
189
642
      arch = ctx->archive->handler[i]->open(ctx, file);
190
642
      if (arch)
191
238
        return arch;
192
642
    }
193
105k
  }
194
195
35.1k
  return NULL;
196
35.3k
}
197
198
fz_archive *
199
fz_open_archive_with_stream(fz_context *ctx, fz_stream *file)
200
26
{
201
26
  fz_archive *arch = fz_try_open_archive_with_stream(ctx, file);
202
26
  if (arch == NULL)
203
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "cannot recognize archive");
204
26
  return arch;
205
26
}
206
207
fz_archive *
208
fz_open_archive(fz_context *ctx, const char *filename)
209
0
{
210
0
  fz_stream *file;
211
0
  fz_archive *arch = NULL;
212
213
0
  file = fz_open_file(ctx, filename);
214
215
0
  fz_try(ctx)
216
0
    arch = fz_open_archive_with_stream(ctx, file);
217
0
  fz_always(ctx)
218
0
    fz_drop_stream(ctx, file);
219
0
  fz_catch(ctx)
220
0
    fz_rethrow(ctx);
221
222
0
  return arch;
223
0
}
224
225
fz_archive *
226
fz_keep_archive(fz_context *ctx, fz_archive *arch)
227
98
{
228
98
  return (fz_archive *)fz_keep_imp(ctx, arch, &arch->refs);
229
98
}
230
231
void
232
fz_drop_archive(fz_context *ctx, fz_archive *arch)
233
35.8k
{
234
35.8k
  if (fz_drop_imp(ctx, arch, &arch->refs))
235
642
  {
236
642
    if (arch->drop_archive)
237
642
      arch->drop_archive(ctx, arch);
238
642
    fz_drop_stream(ctx, arch->file);
239
642
    fz_free(ctx, arch);
240
642
  }
241
35.8k
}
242
243
/* In-memory archive using a fz_tree holding fz_buffers */
244
245
typedef struct
246
{
247
  fz_archive super;
248
  fz_tree *tree;
249
} fz_tree_archive;
250
251
static int has_tree_entry(fz_context *ctx, fz_archive *arch, const char *name)
252
0
{
253
0
  fz_tree *tree = ((fz_tree_archive*)arch)->tree;
254
0
  fz_buffer *ent = fz_tree_lookup(ctx, tree, name);
255
0
  return ent != NULL;
256
0
}
257
258
static fz_buffer *read_tree_entry(fz_context *ctx, fz_archive *arch, const char *name)
259
0
{
260
0
  fz_tree *tree = ((fz_tree_archive*)arch)->tree;
261
0
  fz_buffer *ent = fz_tree_lookup(ctx, tree, name);
262
0
  return fz_keep_buffer(ctx, ent);
263
0
}
264
265
static fz_stream *open_tree_entry(fz_context *ctx, fz_archive *arch, const char *name)
266
0
{
267
0
  fz_tree *tree = ((fz_tree_archive*)arch)->tree;
268
0
  fz_buffer *ent = fz_tree_lookup(ctx, tree, name);
269
0
  return fz_open_buffer(ctx, ent);
270
0
}
271
272
static void drop_tree_archive_entry(fz_context *ctx, void *ent)
273
0
{
274
0
  fz_drop_buffer(ctx, ent);
275
0
}
276
277
static void drop_tree_archive(fz_context *ctx, fz_archive *arch)
278
0
{
279
0
  fz_tree *tree = ((fz_tree_archive*)arch)->tree;
280
0
  fz_drop_tree(ctx, tree, drop_tree_archive_entry);
281
0
}
282
283
fz_archive *
284
fz_new_tree_archive(fz_context *ctx, fz_tree *tree)
285
0
{
286
0
  fz_tree_archive *arch;
287
288
0
  arch = fz_new_derived_archive(ctx, NULL, fz_tree_archive);
289
0
  arch->super.format = "tree";
290
0
  arch->super.has_entry = has_tree_entry;
291
0
  arch->super.read_entry = read_tree_entry;
292
0
  arch->super.open_entry = open_tree_entry;
293
0
  arch->super.drop_archive = drop_tree_archive;
294
0
  arch->tree = tree;
295
296
0
  return &arch->super;
297
0
}
298
299
void
300
fz_tree_archive_add_buffer(fz_context *ctx, fz_archive *arch_, const char *name, fz_buffer *buf)
301
0
{
302
0
  fz_tree_archive *arch = (fz_tree_archive *)arch_;
303
304
0
  if (arch == NULL || arch->super.has_entry != has_tree_entry)
305
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "cannot insert into a non-tree archive");
306
307
0
  buf = fz_keep_buffer(ctx, buf);
308
309
0
  fz_try(ctx)
310
0
    arch->tree = fz_tree_insert(ctx, arch->tree, name, buf);
311
0
  fz_catch(ctx)
312
0
  {
313
0
    fz_drop_buffer(ctx, buf);
314
0
    fz_rethrow(ctx);
315
0
  }
316
0
}
317
318
void
319
fz_tree_archive_add_data(fz_context *ctx, fz_archive *arch_, const char *name, const void *data, size_t size)
320
0
{
321
0
  fz_tree_archive *arch = (fz_tree_archive *)arch_;
322
0
  fz_buffer *buf;
323
324
0
  if (arch == NULL || arch->super.has_entry != has_tree_entry)
325
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "cannot insert into a non-tree archive");
326
327
0
  buf = fz_new_buffer_from_copied_data(ctx, data, size);
328
329
0
  fz_try(ctx)
330
0
    arch->tree = fz_tree_insert(ctx, arch->tree, name, buf);
331
0
  fz_catch(ctx)
332
0
  {
333
0
    fz_drop_buffer(ctx, buf);
334
0
    fz_rethrow(ctx);
335
0
  }
336
0
}
337
338
typedef struct
339
{
340
  fz_archive *arch;
341
  char *dir;
342
} multi_archive_entry;
343
344
typedef struct
345
{
346
  fz_archive super;
347
  int len;
348
  int max;
349
  multi_archive_entry *sub;
350
} fz_multi_archive;
351
352
static int has_multi_entry(fz_context *ctx, fz_archive *arch_, const char *name)
353
0
{
354
0
  fz_multi_archive *arch = (fz_multi_archive *)arch_;
355
0
  int i;
356
357
0
  for (i = arch->len-1; i >= 0; i--)
358
0
  {
359
0
    multi_archive_entry *e = &arch->sub[i];
360
0
    const char *subname = name;
361
0
    if (e->dir)
362
0
    {
363
0
      size_t n = strlen(e->dir);
364
0
      if (strncmp(e->dir, name, n) != 0)
365
0
        continue;
366
0
      subname += n;
367
0
    }
368
0
    if (fz_has_archive_entry(ctx, arch->sub[i].arch, subname))
369
0
      return 1;
370
0
  }
371
0
  return 0;
372
0
}
373
374
static fz_buffer *read_multi_entry(fz_context *ctx, fz_archive *arch_, const char *name)
375
0
{
376
0
  fz_multi_archive *arch = (fz_multi_archive *)arch_;
377
0
  int i;
378
0
  fz_buffer *res = NULL;
379
380
0
  for (i = arch->len-1; i >= 0; i--)
381
0
  {
382
0
    multi_archive_entry *e = &arch->sub[i];
383
0
    const char *subname = name;
384
385
0
    if (e->dir)
386
0
    {
387
0
      size_t n = strlen(e->dir);
388
0
      if (strncmp(e->dir, name, n) != 0)
389
0
        continue;
390
0
      subname += n;
391
0
    }
392
393
0
    res = fz_try_read_archive_entry(ctx, arch->sub[i].arch, subname);
394
395
0
    if (res)
396
0
      break;
397
0
  }
398
399
0
  return res;
400
0
}
401
402
static fz_stream *open_multi_entry(fz_context *ctx, fz_archive *arch_, const char *name)
403
0
{
404
0
  fz_multi_archive *arch = (fz_multi_archive *)arch_;
405
0
  int i;
406
0
  fz_stream *res = NULL;
407
408
0
  for (i = arch->len-1; i >= 0; i--)
409
0
  {
410
0
    multi_archive_entry *e = &arch->sub[i];
411
0
    const char *subname = name;
412
413
0
    if (e->dir)
414
0
    {
415
0
      size_t n = strlen(e->dir);
416
0
      if (strncmp(e->dir, name, n) != 0)
417
0
        continue;
418
0
      subname += n;
419
0
    }
420
421
0
    res = fz_open_archive_entry(ctx, arch->sub[i].arch, subname);
422
423
0
    if (res)
424
0
      break;
425
0
  }
426
427
0
  return res;
428
0
}
429
430
static void drop_multi_archive(fz_context *ctx, fz_archive *arch_)
431
0
{
432
0
  fz_multi_archive *arch = (fz_multi_archive *)arch_;
433
0
  int i;
434
435
0
  for (i = arch->len-1; i >= 0; i--)
436
0
  {
437
0
    multi_archive_entry *e = &arch->sub[i];
438
0
    fz_free(ctx, e->dir);
439
0
    fz_drop_archive(ctx, e->arch);
440
0
  }
441
0
  fz_free(ctx, arch->sub);
442
0
}
443
444
fz_archive *
445
fz_new_multi_archive(fz_context *ctx)
446
0
{
447
0
  fz_multi_archive *arch;
448
449
0
  arch = fz_new_derived_archive(ctx, NULL, fz_multi_archive);
450
0
  arch->super.format = "multi";
451
0
  arch->super.has_entry = has_multi_entry;
452
0
  arch->super.read_entry = read_multi_entry;
453
0
  arch->super.open_entry = open_multi_entry;
454
0
  arch->super.drop_archive = drop_multi_archive;
455
0
  arch->max = 0;
456
0
  arch->len = 0;
457
0
  arch->sub = NULL;
458
459
0
  return &arch->super;
460
0
}
461
462
void
463
fz_mount_multi_archive(fz_context *ctx, fz_archive *arch_, fz_archive *sub, const char *path)
464
0
{
465
0
  fz_multi_archive *arch = (fz_multi_archive *)arch_;
466
0
  char *clean_path = NULL;
467
468
0
  if (arch->super.has_entry != has_multi_entry)
469
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "cannot mount within a non-multi archive");
470
471
0
  if (arch->len == arch->max)
472
0
  {
473
0
    int n = arch->max ? arch->max * 2 : 8;
474
475
0
    arch->sub = fz_realloc(ctx, arch->sub, sizeof(*arch->sub) * n);
476
0
    arch->max = n;
477
0
  }
478
479
  /* If we have a path, then strip any trailing slashes, and add just one. */
480
0
  if (path)
481
0
  {
482
0
    clean_path = fz_cleanname_strdup(ctx, path);
483
0
    if (clean_path[0] == '.' && clean_path[1] == 0)
484
0
    {
485
0
      fz_free(ctx, clean_path);
486
0
      clean_path = NULL;
487
0
    }
488
0
    else
489
0
    {
490
      /* Do a strcat without doing a strcat to avoid the compiler
491
       * complaining at us. We know that n here will be <= n above
492
       * so this is safe. */
493
0
      size_t n = strlen(clean_path);
494
0
      clean_path[n] = '/';
495
0
      clean_path[n + 1] = 0;
496
0
    }
497
0
  }
498
499
0
  arch->sub[arch->len].arch = fz_keep_archive(ctx, sub);
500
0
  arch->sub[arch->len].dir = clean_path;
501
0
  arch->len++;
502
0
}
503
504
static const fz_archive_handler fz_zip_archive_handler =
505
{
506
  fz_is_zip_archive,
507
  fz_open_zip_archive_with_stream
508
};
509
510
static const fz_archive_handler fz_tar_archive_handler =
511
{
512
  fz_is_tar_archive,
513
  fz_open_tar_archive_with_stream
514
};
515
516
const fz_archive_handler fz_libarchive_archive_handler =
517
{
518
  fz_is_libarchive_archive,
519
  fz_open_libarchive_archive_with_stream
520
};
521
522
const fz_archive_handler fz_cfb_archive_handler =
523
{
524
  fz_is_cfb_archive,
525
  fz_open_cfb_archive_with_stream
526
};
527
528
void fz_new_archive_handler_context(fz_context *ctx)
529
8.79k
{
530
8.79k
  ctx->archive = fz_malloc_struct(ctx, fz_archive_handler_context);
531
8.79k
  ctx->archive->refs = 1;
532
533
8.79k
  fz_register_archive_handler(ctx, &fz_zip_archive_handler);
534
8.79k
  fz_register_archive_handler(ctx, &fz_tar_archive_handler);
535
#ifdef HAVE_LIBARCHIVE
536
  fz_register_archive_handler(ctx, &fz_libarchive_archive_handler);
537
#endif
538
8.79k
  fz_register_archive_handler(ctx, &fz_cfb_archive_handler);
539
8.79k
}
540
541
fz_archive_handler_context *fz_keep_archive_handler_context(fz_context *ctx)
542
0
{
543
0
  if (!ctx || !ctx->archive)
544
0
    return NULL;
545
0
  return fz_keep_imp(ctx, ctx->archive, &ctx->archive->refs);
546
0
}
547
548
void fz_drop_archive_handler_context(fz_context *ctx)
549
8.79k
{
550
8.79k
  if (!ctx)
551
0
    return;
552
553
8.79k
  if (fz_drop_imp(ctx, ctx->archive, &ctx->archive->refs))
554
8.79k
  {
555
8.79k
    fz_free(ctx, ctx->archive);
556
8.79k
    ctx->archive = NULL;
557
8.79k
  }
558
8.79k
}
559
560
void fz_register_archive_handler(fz_context *ctx, const fz_archive_handler *handler)
561
26.3k
{
562
26.3k
  fz_archive_handler_context *ac;
563
26.3k
  int i;
564
565
26.3k
  if (!handler)
566
0
    return;
567
568
26.3k
  ac = ctx->archive;
569
26.3k
  if (ac == NULL)
570
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "archive handler list not found");
571
572
52.7k
  for (i = 0; i < ac->count; i++)
573
26.3k
    if (ac->handler[i] == handler)
574
0
      return;
575
576
26.3k
  if (ac->count >= FZ_ARCHIVE_HANDLER_MAX)
577
0
    fz_throw(ctx, FZ_ERROR_LIMIT, "Too many archive handlers");
578
579
26.3k
  ac->handler[ac->count++] = handler;
580
26.3k
}