Coverage Report

Created: 2023-06-07 06:20

/src/mupdf/source/html/html-doc.c
Line
Count
Source (jump to first uncovered line)
1
// Copyright (C) 2004-2022 Artifex Software, Inc.
2
//
3
// This file is part of MuPDF.
4
//
5
// MuPDF is free software: you can redistribute it and/or modify it under the
6
// terms of the GNU Affero General Public License as published by the Free
7
// Software Foundation, either version 3 of the License, or (at your option)
8
// any later version.
9
//
10
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13
// details.
14
//
15
// You should have received a copy of the GNU Affero General Public License
16
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17
//
18
// Alternative licensing terms are available from the licensor.
19
// For commercial licensing, see <https://www.artifex.com/> or contact
20
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21
// CA 94129, USA, for further information.
22
23
#include "mupdf/fitz.h"
24
#include "html-imp.h"
25
26
#include <string.h>
27
#include <math.h>
28
29
enum { T, R, B, L };
30
31
enum { FORMAT_FB2, FORMAT_XHTML, FORMAT_HTML5, FORMAT_MOBI };
32
33
typedef struct
34
{
35
  fz_document super;
36
  fz_archive *zip;
37
  fz_html_font_set *set;
38
  fz_html *html;
39
  fz_outline *outline;
40
} html_document;
41
42
typedef struct
43
{
44
  fz_page super;
45
  html_document *doc;
46
  int number;
47
} html_page;
48
49
static void
50
htdoc_drop_document(fz_context *ctx, fz_document *doc_)
51
0
{
52
0
  html_document *doc = (html_document*)doc_;
53
0
  fz_drop_archive(ctx, doc->zip);
54
0
  fz_drop_html(ctx, doc->html);
55
0
  fz_drop_html_font_set(ctx, doc->set);
56
0
  fz_drop_outline(ctx, doc->outline);
57
0
}
58
59
static fz_link_dest
60
htdoc_resolve_link(fz_context *ctx, fz_document *doc_, const char *dest)
61
0
{
62
0
  html_document *doc = (html_document*)doc_;
63
0
  const char *s = strchr(dest, '#');
64
0
  if (s && s[1] != 0)
65
0
  {
66
0
    float y = fz_find_html_target(ctx, doc->html, s+1);
67
0
    if (y >= 0)
68
0
    {
69
0
      int page = y / doc->html->page_h;
70
0
      return fz_make_link_dest_xyz(0, page, 0, y - page * doc->html->page_h, 0);
71
0
    }
72
0
  }
73
74
0
  return fz_make_link_dest_none();
75
0
}
76
77
static int
78
htdoc_count_pages(fz_context *ctx, fz_document *doc_, int chapter)
79
0
{
80
0
  html_document *doc = (html_document*)doc_;
81
0
  if (doc->html->tree.root->s.layout.b > 0)
82
0
    return ceilf(doc->html->tree.root->s.layout.b / doc->html->page_h);
83
0
  return 1;
84
0
}
85
86
static void
87
htdoc_update_outline(fz_context *ctx, fz_document *doc, fz_outline *node)
88
0
{
89
0
  while (node)
90
0
  {
91
0
    fz_link_dest dest = htdoc_resolve_link(ctx, doc, node->uri);
92
0
    node->page = dest.loc;
93
0
    node->x = dest.x;
94
0
    node->y = dest.y;
95
0
    htdoc_update_outline(ctx, doc, node->down);
96
0
    node = node->next;
97
0
  }
98
0
}
99
100
static void
101
htdoc_layout(fz_context *ctx, fz_document *doc_, float w, float h, float em)
102
0
{
103
0
  html_document *doc = (html_document*)doc_;
104
105
0
  fz_layout_html(ctx, doc->html, w, h, em);
106
107
0
  htdoc_update_outline(ctx, doc_, doc->outline);
108
0
}
109
110
static void
111
htdoc_drop_page(fz_context *ctx, fz_page *page_)
112
0
{
113
0
}
114
115
static fz_rect
116
htdoc_bound_page(fz_context *ctx, fz_page *page_)
117
0
{
118
0
  html_page *page = (html_page*)page_;
119
0
  html_document *doc = page->doc;
120
0
  fz_rect bbox;
121
0
  bbox.x0 = 0;
122
0
  bbox.y0 = 0;
123
0
  bbox.x1 = doc->html->page_w + doc->html->page_margin[L] + doc->html->page_margin[R];
124
0
  bbox.y1 = doc->html->page_h + doc->html->page_margin[T] + doc->html->page_margin[B];
125
0
  return bbox;
126
0
}
127
128
static void
129
htdoc_run_page(fz_context *ctx, fz_page *page_, fz_device *dev, fz_matrix ctm, fz_cookie *cookie)
130
0
{
131
0
  html_page *page = (html_page*)page_;
132
0
  html_document *doc = page->doc;
133
0
  fz_draw_html(ctx, dev, ctm, doc->html, page->number);
134
0
}
135
136
static fz_link *
137
htdoc_load_links(fz_context *ctx, fz_page *page_)
138
0
{
139
0
  html_page *page = (html_page*)page_;
140
0
  html_document *doc = page->doc;
141
0
  return fz_load_html_links(ctx, doc->html, page->number, "");
142
0
}
143
144
static fz_bookmark
145
htdoc_make_bookmark(fz_context *ctx, fz_document *doc_, fz_location loc)
146
0
{
147
0
  html_document *doc = (html_document*)doc_;
148
0
  return fz_make_html_bookmark(ctx, doc->html, loc.page);
149
0
}
150
151
static fz_location
152
htdoc_lookup_bookmark(fz_context *ctx, fz_document *doc_, fz_bookmark mark)
153
0
{
154
0
  html_document *doc = (html_document*)doc_;
155
0
  return fz_make_location(0, fz_lookup_html_bookmark(ctx, doc->html, mark));
156
0
}
157
158
static fz_page *
159
htdoc_load_page(fz_context *ctx, fz_document *doc_, int chapter, int number)
160
0
{
161
0
  html_document *doc = (html_document*)doc_;
162
0
  html_page *page = fz_new_derived_page(ctx, html_page, doc_);
163
0
  page->super.bound_page = htdoc_bound_page;
164
0
  page->super.run_page_contents = htdoc_run_page;
165
0
  page->super.load_links = htdoc_load_links;
166
0
  page->super.drop_page = htdoc_drop_page;
167
0
  page->doc = doc;
168
0
  page->number = number;
169
0
  return (fz_page*)page;
170
0
}
171
172
static fz_outline *
173
htdoc_load_outline(fz_context *ctx, fz_document *doc_)
174
0
{
175
0
  html_document *doc = (html_document*)doc_;
176
0
  return fz_keep_outline(ctx, doc->outline);
177
0
}
178
179
static int
180
fb2doc_lookup_metadata(fz_context *ctx, fz_document *doc_, const char *key, char *buf, int size)
181
0
{
182
0
  html_document *doc = (html_document*)doc_;
183
0
  if (!strcmp(key, FZ_META_FORMAT))
184
0
    return 1 + (int)fz_strlcpy(buf, "FictionBook2", size);
185
0
  if (!strcmp(key, FZ_META_INFO_TITLE) && doc->html->title)
186
0
    return 1 + (int)fz_strlcpy(buf, doc->html->title, size);
187
0
  return -1;
188
0
}
189
190
static int
191
htdoc_lookup_metadata(fz_context *ctx, fz_document *doc_, const char *key, char *buf, int size)
192
0
{
193
0
  html_document *doc = (html_document*)doc_;
194
0
  if (!strcmp(key, FZ_META_FORMAT))
195
0
    return (int)fz_strlcpy(buf, "HTML5", size);
196
0
  if (!strcmp(key, FZ_META_INFO_TITLE) && doc->html->title)
197
0
    return 1 + (int)fz_strlcpy(buf, doc->html->title, size);
198
0
  return -1;
199
0
}
200
201
static int
202
xhtdoc_lookup_metadata(fz_context *ctx, fz_document *doc_, const char *key, char *buf, int size)
203
0
{
204
0
  html_document *doc = (html_document*)doc_;
205
0
  if (!strcmp(key, FZ_META_FORMAT))
206
0
    return (int)fz_strlcpy(buf, "XHTML", size);
207
0
  if (!strcmp(key, FZ_META_INFO_TITLE) && doc->html->title)
208
0
    return 1 + (int)fz_strlcpy(buf, doc->html->title, size);
209
0
  return -1;
210
0
}
211
212
static int
213
mobi_lookup_metadata(fz_context *ctx, fz_document *doc_, const char *key, char *buf, int size)
214
0
{
215
0
  html_document *doc = (html_document*)doc_;
216
0
  if (!strcmp(key, FZ_META_FORMAT))
217
0
    return (int)fz_strlcpy(buf, "MOBI", size);
218
0
  if (!strcmp(key, FZ_META_INFO_TITLE) && doc->html->title)
219
0
    return 1 + (int)fz_strlcpy(buf, doc->html->title, size);
220
0
  return -1;
221
0
}
222
223
static fz_document *
224
htdoc_open_document_with_buffer(fz_context *ctx, fz_archive *zip, fz_buffer *buf, int format)
225
0
{
226
0
  html_document *doc = fz_new_derived_document(ctx, html_document);
227
0
  doc->super.drop_document = htdoc_drop_document;
228
0
  doc->super.layout = htdoc_layout;
229
0
  doc->super.load_outline = htdoc_load_outline;
230
0
  doc->super.resolve_link_dest = htdoc_resolve_link;
231
0
  doc->super.make_bookmark = htdoc_make_bookmark;
232
0
  doc->super.lookup_bookmark = htdoc_lookup_bookmark;
233
0
  doc->super.count_pages = htdoc_count_pages;
234
0
  doc->super.load_page = htdoc_load_page;
235
0
  switch (format)
236
0
  {
237
0
  case FORMAT_FB2: doc->super.lookup_metadata = fb2doc_lookup_metadata; break;
238
0
  case FORMAT_HTML5: doc->super.lookup_metadata = htdoc_lookup_metadata; break;
239
0
  case FORMAT_XHTML: doc->super.lookup_metadata = xhtdoc_lookup_metadata; break;
240
0
  case FORMAT_MOBI: doc->super.lookup_metadata = mobi_lookup_metadata; break;
241
0
  }
242
0
  doc->super.is_reflowable = 1;
243
244
0
  fz_try(ctx)
245
0
  {
246
0
    doc->zip = zip;
247
0
    doc->set = fz_new_html_font_set(ctx);
248
0
    switch (format)
249
0
    {
250
0
    case FORMAT_FB2: doc->html = fz_parse_fb2(ctx, doc->set, doc->zip, ".", buf, fz_user_css(ctx)); break;
251
0
    case FORMAT_HTML5: doc->html = fz_parse_html5(ctx, doc->set, doc->zip, ".", buf, fz_user_css(ctx)); break;
252
0
    case FORMAT_XHTML: doc->html = fz_parse_xhtml(ctx, doc->set, doc->zip, ".", buf, fz_user_css(ctx)); break;
253
0
    case FORMAT_MOBI: doc->html = fz_parse_mobi(ctx, doc->set, doc->zip, ".", buf, fz_user_css(ctx)); break;
254
0
    }
255
0
    doc->outline = fz_load_html_outline(ctx, doc->html);
256
0
  }
257
0
  fz_always(ctx)
258
0
    fz_drop_buffer(ctx, buf);
259
0
  fz_catch(ctx)
260
0
  {
261
0
    fz_drop_document(ctx, &doc->super);
262
0
    fz_rethrow(ctx);
263
0
  }
264
265
0
  return (fz_document*)doc;
266
0
}
267
268
static fz_document *
269
htdoc_open_document_with_stream(fz_context *ctx, fz_stream *file)
270
0
{
271
0
  return htdoc_open_document_with_buffer(ctx, fz_open_directory(ctx, "."), fz_read_all(ctx, file, 0), FORMAT_HTML5);
272
0
}
273
274
static fz_document *
275
htdoc_open_document(fz_context *ctx, const char *filename)
276
0
{
277
0
  char dirname[2048];
278
0
  fz_dirname(dirname, filename, sizeof dirname);
279
0
  return htdoc_open_document_with_buffer(ctx, fz_open_directory(ctx, dirname), fz_read_file(ctx, filename), FORMAT_HTML5);
280
0
}
281
282
static const char *htdoc_extensions[] =
283
{
284
  "htm",
285
  "html",
286
  NULL
287
};
288
289
static const char *htdoc_mimetypes[] =
290
{
291
  "text/html",
292
  NULL
293
};
294
295
fz_document_handler html_document_handler =
296
{
297
  NULL,
298
  htdoc_open_document,
299
  htdoc_open_document_with_stream,
300
  htdoc_extensions,
301
  htdoc_mimetypes,
302
  NULL,
303
  NULL,
304
};
305
306
static fz_document *
307
xhtdoc_open_document_with_stream(fz_context *ctx, fz_stream *file)
308
0
{
309
0
  return htdoc_open_document_with_buffer(ctx, fz_open_directory(ctx, "."), fz_read_all(ctx, file, 0), FORMAT_XHTML);
310
0
}
311
312
static fz_document *
313
xhtdoc_open_document(fz_context *ctx, const char *filename)
314
0
{
315
0
  char dirname[2048];
316
0
  fz_dirname(dirname, filename, sizeof dirname);
317
0
  return htdoc_open_document_with_buffer(ctx, fz_open_directory(ctx, dirname), fz_read_file(ctx, filename), FORMAT_XHTML);
318
0
}
319
320
static const char *xhtdoc_extensions[] =
321
{
322
  "xhtml",
323
  NULL
324
};
325
326
static const char *xhtdoc_mimetypes[] =
327
{
328
  "application/xhtml+xml",
329
  NULL
330
};
331
332
fz_document_handler xhtml_document_handler =
333
{
334
  NULL,
335
  xhtdoc_open_document,
336
  xhtdoc_open_document_with_stream,
337
  xhtdoc_extensions,
338
  xhtdoc_mimetypes
339
};
340
341
static fz_document *
342
fb2doc_open_document_with_stream(fz_context *ctx, fz_stream *file)
343
0
{
344
0
  return htdoc_open_document_with_buffer(ctx, NULL, fz_read_all(ctx, file, 0), FORMAT_FB2);
345
0
}
346
347
static fz_document *
348
fb2doc_open_document(fz_context *ctx, const char *filename)
349
0
{
350
0
  return htdoc_open_document_with_buffer(ctx, NULL, fz_read_file(ctx, filename), FORMAT_FB2);
351
0
}
352
353
static const char *fb2doc_extensions[] =
354
{
355
  "fb2",
356
  "xml",
357
  NULL
358
};
359
360
static const char *fb2doc_mimetypes[] =
361
{
362
  "application/x-fictionbook",
363
  "application/xml",
364
  "text/xml",
365
  NULL
366
};
367
368
fz_document_handler fb2_document_handler =
369
{
370
  NULL,
371
  fb2doc_open_document,
372
  fb2doc_open_document_with_stream,
373
  fb2doc_extensions,
374
  fb2doc_mimetypes
375
};
376
377
static fz_document *
378
mobi_open_document_with_buffer(fz_context *ctx, fz_buffer *mobi)
379
0
{
380
0
  fz_archive *zip = NULL;
381
0
  fz_buffer *html;
382
0
  fz_var(zip);
383
0
  fz_try(ctx)
384
0
  {
385
0
    zip = fz_extract_html_from_mobi(ctx, mobi);
386
0
    html = fz_read_archive_entry(ctx, zip, "index.html");
387
0
  }
388
0
  fz_always(ctx)
389
0
  {
390
0
    fz_drop_buffer(ctx, mobi);
391
0
  }
392
0
  fz_catch(ctx)
393
0
  {
394
0
    fz_drop_archive(ctx, zip);
395
0
    fz_rethrow(ctx);
396
0
  }
397
0
  return htdoc_open_document_with_buffer(ctx, zip, html, FORMAT_MOBI);
398
0
}
399
400
static fz_document *
401
mobi_open_document_with_stream(fz_context *ctx, fz_stream *file)
402
0
{
403
0
  return mobi_open_document_with_buffer(ctx, fz_read_all(ctx, file, 0));
404
0
}
405
406
static fz_document *
407
mobi_open_document(fz_context *ctx, const char *filename)
408
0
{
409
0
  return mobi_open_document_with_buffer(ctx, fz_read_file(ctx, filename));
410
0
}
411
412
static const char *mobi_extensions[] =
413
{
414
  "mobi",
415
  "prc",
416
  "pdb",
417
  NULL
418
};
419
420
static const char *mobi_mimetypes[] =
421
{
422
  "application/x-mobipocket-ebook",
423
  NULL
424
};
425
426
fz_document_handler mobi_document_handler =
427
{
428
  NULL,
429
  mobi_open_document,
430
  mobi_open_document_with_stream,
431
  mobi_extensions,
432
  mobi_mimetypes
433
};