Coverage Report

Created: 2023-11-27 07:08

/src/mupdf/source/xps/xps-doc.c
Line
Count
Source (jump to first uncovered line)
1
// Copyright (C) 2004-2021 Artifex Software, Inc.
2
//
3
// This file is part of MuPDF.
4
//
5
// MuPDF is free software: you can redistribute it and/or modify it under the
6
// terms of the GNU Affero General Public License as published by the Free
7
// Software Foundation, either version 3 of the License, or (at your option)
8
// any later version.
9
//
10
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13
// details.
14
//
15
// You should have received a copy of the GNU Affero General Public License
16
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17
//
18
// Alternative licensing terms are available from the licensor.
19
// For commercial licensing, see <https://www.artifex.com/> or contact
20
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21
// CA 94129, USA, for further information.
22
23
#include "mupdf/fitz.h"
24
#include "xps-imp.h"
25
26
#include <string.h>
27
#include <stdlib.h>
28
29
#define REL_START_PART \
30
0
  "http://schemas.microsoft.com/xps/2005/06/fixedrepresentation"
31
#define REL_DOC_STRUCTURE \
32
0
  "http://schemas.microsoft.com/xps/2005/06/documentstructure"
33
#define REL_REQUIRED_RESOURCE \
34
  "http://schemas.microsoft.com/xps/2005/06/required-resource"
35
#define REL_REQUIRED_RESOURCE_RECURSIVE \
36
  "http://schemas.microsoft.com/xps/2005/06/required-resource#recursive"
37
38
#define REL_START_PART_OXPS \
39
0
  "http://schemas.openxps.org/oxps/v1.0/fixedrepresentation"
40
#define REL_DOC_STRUCTURE_OXPS \
41
0
  "http://schemas.openxps.org/oxps/v1.0/documentstructure"
42
43
static void
44
xps_rels_for_part(fz_context *ctx, xps_document *doc, char *buf, char *name, int buflen)
45
0
{
46
0
  char *p, *basename;
47
0
  p = strrchr(name, '/');
48
0
  basename = p ? p + 1 : name;
49
0
  fz_strlcpy(buf, name, buflen);
50
0
  p = strrchr(buf, '/');
51
0
  if (p) *p = 0;
52
0
  fz_strlcat(buf, "/_rels/", buflen);
53
0
  fz_strlcat(buf, basename, buflen);
54
0
  fz_strlcat(buf, ".rels", buflen);
55
0
}
56
57
/*
58
 * The FixedDocumentSequence and FixedDocument parts determine
59
 * which parts correspond to actual pages, and the page order.
60
 */
61
62
static void
63
xps_add_fixed_document(fz_context *ctx, xps_document *doc, char *name)
64
0
{
65
0
  xps_fixdoc *fixdoc;
66
67
  /* Check for duplicates first */
68
0
  for (fixdoc = doc->first_fixdoc; fixdoc; fixdoc = fixdoc->next)
69
0
    if (!strcmp(fixdoc->name, name))
70
0
      return;
71
72
0
  fixdoc = fz_malloc_struct(ctx, xps_fixdoc);
73
0
  fz_try(ctx)
74
0
  {
75
0
    fixdoc->name = fz_strdup(ctx, name);
76
0
    fixdoc->outline = NULL;
77
0
    fixdoc->next = NULL;
78
0
  }
79
0
  fz_catch(ctx)
80
0
  {
81
0
    fz_free(ctx, fixdoc);
82
0
    fz_rethrow(ctx);
83
0
  }
84
85
0
  if (!doc->first_fixdoc)
86
0
  {
87
0
    doc->first_fixdoc = fixdoc;
88
0
    doc->last_fixdoc = fixdoc;
89
0
  }
90
0
  else
91
0
  {
92
0
    doc->last_fixdoc->next = fixdoc;
93
0
    doc->last_fixdoc = fixdoc;
94
0
  }
95
0
}
96
97
static void
98
xps_add_fixed_page(fz_context *ctx, xps_document *doc, char *name, int width, int height)
99
0
{
100
0
  xps_fixpage *page;
101
102
  /* Check for duplicates first */
103
0
  for (page = doc->first_page; page; page = page->next)
104
0
    if (!strcmp(page->name, name))
105
0
      return;
106
107
0
  page = fz_malloc_struct(ctx, xps_fixpage);
108
0
  page->name = NULL;
109
110
0
  fz_try(ctx)
111
0
  {
112
0
    page->name = fz_strdup(ctx, name);
113
0
    page->number = doc->page_count++;
114
0
    page->width = width;
115
0
    page->height = height;
116
0
    page->next = NULL;
117
0
  }
118
0
  fz_catch(ctx)
119
0
  {
120
0
    fz_free(ctx, page->name);
121
0
    fz_free(ctx, page);
122
0
    fz_rethrow(ctx);
123
0
  }
124
125
0
  if (!doc->first_page)
126
0
  {
127
0
    doc->first_page = page;
128
0
    doc->last_page = page;
129
0
  }
130
0
  else
131
0
  {
132
0
    doc->last_page->next = page;
133
0
    doc->last_page = page;
134
0
  }
135
0
}
136
137
static void
138
xps_add_link_target(fz_context *ctx, xps_document *doc, char *name)
139
0
{
140
0
  xps_fixpage *page = doc->last_page;
141
0
  xps_target *target;
142
143
0
  if (page == NULL)
144
0
  {
145
0
    fz_warn(ctx, "Dropping link target with no page");
146
0
    return;
147
0
  }
148
149
0
  target = fz_malloc_struct(ctx, xps_target);
150
151
0
  fz_try(ctx)
152
0
  {
153
0
    target->name = fz_strdup(ctx, name);
154
0
    target->page = page->number;
155
0
    target->next = doc->target;
156
0
  }
157
0
  fz_catch(ctx)
158
0
  {
159
0
    fz_free(ctx, target);
160
0
    fz_rethrow(ctx);
161
0
  }
162
163
0
  doc->target = target;
164
0
}
165
166
fz_link_dest
167
xps_lookup_link_target(fz_context *ctx, fz_document *doc_, const char *target_uri)
168
0
{
169
0
  xps_document *doc = (xps_document*)doc_;
170
0
  xps_target *target;
171
0
  const char *needle = strrchr(target_uri, '#');
172
0
  needle = needle ? needle + 1 : target_uri;
173
0
  for (target = doc->target; target; target = target->next)
174
0
    if (!strcmp(target->name, needle))
175
0
      return fz_make_link_dest_xyz(0, target->page, 0, 0, 0);
176
0
  return fz_make_link_dest_xyz(0, fz_atoi(needle) - 1, 0, 0, 0);
177
0
}
178
179
static void
180
xps_drop_link_targets(fz_context *ctx, xps_document *doc)
181
0
{
182
0
  xps_target *target = doc->target, *next;
183
0
  while (target)
184
0
  {
185
0
    next = target->next;
186
0
    fz_free(ctx, target->name);
187
0
    fz_free(ctx, target);
188
0
    target = next;
189
0
  }
190
0
}
191
192
static void
193
xps_drop_fixed_pages(fz_context *ctx, xps_document *doc)
194
0
{
195
0
  xps_fixpage *page = doc->first_page;
196
0
  while (page)
197
0
  {
198
0
    xps_fixpage *next = page->next;
199
0
    fz_free(ctx, page->name);
200
0
    fz_free(ctx, page);
201
0
    page = next;
202
0
  }
203
0
  doc->first_page = NULL;
204
0
  doc->last_page = NULL;
205
0
}
206
207
static void
208
xps_drop_fixed_documents(fz_context *ctx, xps_document *doc)
209
0
{
210
0
  xps_fixdoc *fixdoc = doc->first_fixdoc;
211
0
  while (fixdoc)
212
0
  {
213
0
    xps_fixdoc *next = fixdoc->next;
214
0
    fz_free(ctx, fixdoc->name);
215
0
    fz_free(ctx, fixdoc->outline);
216
0
    fz_free(ctx, fixdoc);
217
0
    fixdoc = next;
218
0
  }
219
0
  doc->first_fixdoc = NULL;
220
0
  doc->last_fixdoc = NULL;
221
0
}
222
223
void
224
xps_drop_page_list(fz_context *ctx, xps_document *doc)
225
0
{
226
0
  xps_drop_fixed_documents(ctx, doc);
227
0
  xps_drop_fixed_pages(ctx, doc);
228
0
  xps_drop_link_targets(ctx, doc);
229
0
}
230
231
/*
232
 * Parse the fixed document sequence structure and _rels/.rels to find the start part.
233
 */
234
235
static void
236
xps_parse_metadata_imp(fz_context *ctx, xps_document *doc, fz_xml *item, xps_fixdoc *fixdoc)
237
0
{
238
0
  while (item)
239
0
  {
240
0
    if (fz_xml_is_tag(item, "Relationship"))
241
0
    {
242
0
      char *target = fz_xml_att(item, "Target");
243
0
      char *type = fz_xml_att(item, "Type");
244
0
      if (target && type)
245
0
      {
246
0
        char tgtbuf[1024];
247
0
        xps_resolve_url(ctx, doc, tgtbuf, doc->base_uri, target, sizeof tgtbuf);
248
0
        if (!strcmp(type, REL_START_PART) || !strcmp(type, REL_START_PART_OXPS))
249
0
        {
250
0
          fz_free(ctx, doc->start_part);
251
0
          doc->start_part = fz_strdup(ctx, tgtbuf);
252
0
        }
253
0
        if ((!strcmp(type, REL_DOC_STRUCTURE) || !strcmp(type, REL_DOC_STRUCTURE_OXPS)) && fixdoc)
254
0
          fixdoc->outline = fz_strdup(ctx, tgtbuf);
255
0
        if (!fz_xml_att(item, "Id"))
256
0
          fz_warn(ctx, "missing relationship id for %s", target);
257
0
      }
258
0
    }
259
260
0
    if (fz_xml_is_tag(item, "DocumentReference"))
261
0
    {
262
0
      char *source = fz_xml_att(item, "Source");
263
0
      if (source)
264
0
      {
265
0
        char srcbuf[1024];
266
0
        xps_resolve_url(ctx, doc, srcbuf, doc->base_uri, source, sizeof srcbuf);
267
0
        xps_add_fixed_document(ctx, doc, srcbuf);
268
0
      }
269
0
    }
270
271
0
    if (fz_xml_is_tag(item, "PageContent"))
272
0
    {
273
0
      char *source = fz_xml_att(item, "Source");
274
0
      char *width_att = fz_xml_att(item, "Width");
275
0
      char *height_att = fz_xml_att(item, "Height");
276
0
      int width = width_att ? atoi(width_att) : 0;
277
0
      int height = height_att ? atoi(height_att) : 0;
278
0
      if (source)
279
0
      {
280
0
        char srcbuf[1024];
281
0
        xps_resolve_url(ctx, doc, srcbuf, doc->base_uri, source, sizeof srcbuf);
282
0
        xps_add_fixed_page(ctx, doc, srcbuf, width, height);
283
0
      }
284
0
    }
285
286
0
    if (fz_xml_is_tag(item, "LinkTarget"))
287
0
    {
288
0
      char *name = fz_xml_att(item, "Name");
289
0
      if (name)
290
0
        xps_add_link_target(ctx, doc, name);
291
0
    }
292
293
0
    xps_parse_metadata_imp(ctx, doc, fz_xml_down(item), fixdoc);
294
295
0
    item = fz_xml_next(item);
296
0
  }
297
0
}
298
299
static void
300
xps_parse_metadata(fz_context *ctx, xps_document *doc, xps_part *part, xps_fixdoc *fixdoc)
301
0
{
302
0
  fz_xml_doc *xml;
303
0
  char buf[1024];
304
0
  char *s;
305
306
  /* Save directory name part */
307
0
  fz_strlcpy(buf, part->name, sizeof buf);
308
0
  s = strrchr(buf, '/');
309
0
  if (s)
310
0
    s[0] = 0;
311
312
  /* _rels parts are voodoo: their URI references are from
313
   * the part they are associated with, not the actual _rels
314
   * part being parsed.
315
   */
316
0
  s = strstr(buf, "/_rels");
317
0
  if (s)
318
0
    *s = 0;
319
320
0
  doc->base_uri = buf;
321
0
  doc->part_uri = part->name;
322
323
0
  xml = fz_parse_xml(ctx, part->data, 0);
324
0
  fz_try(ctx)
325
0
  {
326
0
    xps_parse_metadata_imp(ctx, doc, fz_xml_root(xml), fixdoc);
327
0
  }
328
0
  fz_always(ctx)
329
0
  {
330
0
    fz_drop_xml(ctx, xml);
331
0
    doc->base_uri = NULL;
332
0
    doc->part_uri = NULL;
333
0
  }
334
0
  fz_catch(ctx)
335
0
    fz_rethrow(ctx);
336
0
}
337
338
static void
339
xps_read_and_process_metadata_part(fz_context *ctx, xps_document *doc, char *name, xps_fixdoc *fixdoc)
340
0
{
341
0
  xps_part *part;
342
343
0
  if (!xps_has_part(ctx, doc, name))
344
0
    return;
345
346
0
  part = xps_read_part(ctx, doc, name);
347
0
  fz_try(ctx)
348
0
  {
349
0
    xps_parse_metadata(ctx, doc, part, fixdoc);
350
0
  }
351
0
  fz_always(ctx)
352
0
  {
353
0
    xps_drop_part(ctx, doc, part);
354
0
  }
355
0
  fz_catch(ctx)
356
0
  {
357
0
    fz_rethrow(ctx);
358
0
  }
359
0
}
360
361
void
362
xps_read_page_list(fz_context *ctx, xps_document *doc)
363
0
{
364
0
  xps_fixdoc *fixdoc;
365
366
0
  xps_read_and_process_metadata_part(ctx, doc, "/_rels/.rels", NULL);
367
368
0
  if (!doc->start_part)
369
0
    fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find fixed document sequence start part");
370
371
0
  xps_read_and_process_metadata_part(ctx, doc, doc->start_part, NULL);
372
373
0
  for (fixdoc = doc->first_fixdoc; fixdoc; fixdoc = fixdoc->next)
374
0
  {
375
0
    char relbuf[1024];
376
0
    fz_try(ctx)
377
0
    {
378
0
      xps_rels_for_part(ctx, doc, relbuf, fixdoc->name, sizeof relbuf);
379
0
      xps_read_and_process_metadata_part(ctx, doc, relbuf, fixdoc);
380
0
    }
381
0
    fz_catch(ctx)
382
0
    {
383
0
      fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
384
0
      fz_rethrow_if(ctx, FZ_ERROR_MEMORY);
385
0
      fz_report_error(ctx);
386
0
      fz_warn(ctx, "cannot process FixedDocument rels part");
387
0
    }
388
0
    xps_read_and_process_metadata_part(ctx, doc, fixdoc->name, fixdoc);
389
0
  }
390
0
}
391
392
int
393
xps_count_pages(fz_context *ctx, fz_document *doc_, int chapter)
394
0
{
395
0
  xps_document *doc = (xps_document*)doc_;
396
0
  return doc->page_count;
397
0
}
398
399
static fz_xml_doc *
400
xps_load_fixed_page(fz_context *ctx, xps_document *doc, xps_fixpage *page)
401
0
{
402
0
  xps_part *part;
403
0
  fz_xml_doc *xml = NULL;
404
0
  fz_xml *root;
405
0
  char *width_att;
406
0
  char *height_att;
407
408
0
  part = xps_read_part(ctx, doc, page->name);
409
0
  fz_try(ctx)
410
0
  {
411
0
    xml = fz_parse_xml(ctx, part->data, 0);
412
413
0
    root = fz_xml_root(xml);
414
0
    if (!root)
415
0
      fz_throw(ctx, FZ_ERROR_GENERIC, "FixedPage missing root element");
416
417
0
    if (fz_xml_is_tag(root, "AlternateContent"))
418
0
    {
419
0
      fz_xml *node = xps_lookup_alternate_content(ctx, doc, root);
420
0
      if (!node)
421
0
        fz_throw(ctx, FZ_ERROR_GENERIC, "FixedPage missing alternate root element");
422
0
      fz_detach_xml(ctx, node);
423
0
      root = node;
424
0
    }
425
426
0
    if (!fz_xml_is_tag(root, "FixedPage"))
427
0
      fz_throw(ctx, FZ_ERROR_GENERIC, "expected FixedPage element");
428
0
    width_att = fz_xml_att(root, "Width");
429
0
    if (!width_att)
430
0
      fz_throw(ctx, FZ_ERROR_GENERIC, "FixedPage missing required attribute: Width");
431
0
    height_att = fz_xml_att(root, "Height");
432
0
    if (!height_att)
433
0
      fz_throw(ctx, FZ_ERROR_GENERIC, "FixedPage missing required attribute: Height");
434
435
0
    page->width = atoi(width_att);
436
0
    page->height = atoi(height_att);
437
0
  }
438
0
  fz_always(ctx)
439
0
  {
440
0
    xps_drop_part(ctx, doc, part);
441
0
  }
442
0
  fz_catch(ctx)
443
0
  {
444
0
    fz_drop_xml(ctx, xml);
445
0
    fz_rethrow(ctx);
446
0
  }
447
448
0
  return xml;
449
0
}
450
451
static fz_rect
452
xps_bound_page(fz_context *ctx, fz_page *page_, fz_box_type box)
453
0
{
454
0
  xps_page *page = (xps_page*)page_;
455
0
  fz_rect bounds;
456
0
  bounds.x0 = bounds.y0 = 0;
457
0
  bounds.x1 = page->fix->width * 72.0f / 96.0f;
458
0
  bounds.y1 = page->fix->height * 72.0f / 96.0f;
459
0
  return bounds;
460
0
}
461
462
static void
463
xps_drop_page_imp(fz_context *ctx, fz_page *page_)
464
0
{
465
0
  xps_page *page = (xps_page*)page_;
466
0
  fz_drop_xml(ctx, page->xml);
467
0
}
468
469
fz_page *
470
xps_load_page(fz_context *ctx, fz_document *doc_, int chapter, int number)
471
0
{
472
0
  xps_document *doc = (xps_document*)doc_;
473
0
  xps_page *page = NULL;
474
0
  xps_fixpage *fix;
475
0
  fz_xml_doc *xml;
476
0
  int n = 0;
477
478
0
  fz_var(page);
479
480
0
  for (fix = doc->first_page; fix; fix = fix->next)
481
0
  {
482
0
    if (n == number)
483
0
    {
484
0
      xml = xps_load_fixed_page(ctx, doc, fix);
485
0
      fz_try(ctx)
486
0
      {
487
0
        page = fz_new_derived_page(ctx, xps_page, doc_);
488
0
        page->super.load_links = xps_load_links;
489
0
        page->super.bound_page = xps_bound_page;
490
0
        page->super.run_page_contents = xps_run_page;
491
0
        page->super.drop_page = xps_drop_page_imp;
492
493
0
        page->fix = fix;
494
0
        page->xml = xml;
495
0
      }
496
0
      fz_catch(ctx)
497
0
      {
498
0
        fz_drop_xml(ctx, xml);
499
0
        fz_rethrow(ctx);
500
0
      }
501
0
      return (fz_page*)page;
502
0
    }
503
0
    n ++;
504
0
  }
505
506
0
  fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find page %d", number + 1);
507
0
}
508
509
static int
510
xps_recognize(fz_context *ctx, const char *magic)
511
5.42k
{
512
5.42k
  if (strstr(magic, "/_rels/.rels") || strstr(magic, "\\_rels\\.rels"))
513
0
    return 100;
514
5.42k
  return 0;
515
5.42k
}
516
517
static const char *xps_extensions[] =
518
{
519
  "oxps",
520
  "xps",
521
  NULL
522
};
523
524
static const char *xps_mimetypes[] =
525
{
526
  "application/oxps",
527
  "application/vnd.ms-xpsdocument",
528
  "application/xps",
529
  NULL
530
};
531
532
static int
533
xps_recognize_doc_content(fz_context *ctx, fz_stream *stream)
534
17.7k
{
535
17.7k
  fz_archive *arch = NULL;
536
17.7k
  int ret = 0;
537
17.7k
  fz_xml *xml = NULL;
538
17.7k
  fz_xml *pos;
539
540
17.7k
  fz_var(arch);
541
17.7k
  fz_var(ret);
542
17.7k
  fz_var(xml);
543
544
35.5k
  fz_try(ctx)
545
35.5k
  {
546
17.7k
    arch = fz_try_open_archive_with_stream(ctx, stream);
547
548
17.7k
    xml = fz_try_parse_xml_archive_entry(ctx, arch, "/_rels/.rels", 0);
549
17.7k
    if (xml == NULL)
550
17.7k
      xml = fz_try_parse_xml_archive_entry(ctx, arch, "\\_rels\\.rels", 0);
551
552
17.7k
    if (xml == NULL)
553
17.7k
      break;
554
555
39
    pos = fz_xml_find_dfs(xml, "Relationship", "Type", "http://schemas.microsoft.com/xps/2005/06/fixedrepresentation");
556
39
    if (pos)
557
0
      ret = 100;
558
39
  }
559
35.5k
  fz_always(ctx)
560
17.7k
  {
561
17.7k
    fz_drop_xml(ctx, xml);
562
17.7k
    fz_drop_archive(ctx, arch);
563
17.7k
  }
564
17.7k
  fz_catch(ctx)
565
16
    fz_rethrow(ctx);
566
567
17.7k
  return ret;
568
17.7k
}
569
570
fz_document_handler xps_document_handler =
571
{
572
  xps_recognize,
573
  xps_open_document,
574
  xps_open_document_with_stream,
575
  xps_extensions,
576
  xps_mimetypes,
577
  NULL,
578
  NULL,
579
  xps_recognize_doc_content
580
};