/src/mupdf/source/html/epub-doc.c
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (C) 2004-2024 Artifex Software, Inc. |
2 | | // |
3 | | // This file is part of MuPDF. |
4 | | // |
5 | | // MuPDF is free software: you can redistribute it and/or modify it under the |
6 | | // terms of the GNU Affero General Public License as published by the Free |
7 | | // Software Foundation, either version 3 of the License, or (at your option) |
8 | | // any later version. |
9 | | // |
10 | | // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY |
11 | | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
12 | | // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more |
13 | | // details. |
14 | | // |
15 | | // You should have received a copy of the GNU Affero General Public License |
16 | | // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> |
17 | | // |
18 | | // Alternative licensing terms are available from the licensor. |
19 | | // For commercial licensing, see <https://www.artifex.com/> or contact |
20 | | // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, |
21 | | // CA 94129, USA, for further information. |
22 | | |
23 | | #include "mupdf/fitz.h" |
24 | | #include "html-imp.h" |
25 | | |
26 | | #include <string.h> |
27 | | #include <math.h> |
28 | | |
29 | | #include <zlib.h> /* for crc32 */ |
30 | | |
31 | | enum { T, R, B, L }; |
32 | | |
33 | | typedef struct epub_chapter epub_chapter; |
34 | | typedef struct epub_page epub_page; |
35 | | |
36 | | typedef struct |
37 | | { |
38 | | int max_chapters; |
39 | | int num_chapters; |
40 | | float layout_w; |
41 | | float layout_h; |
42 | | float layout_em; |
43 | | uint32_t css_sum; |
44 | | int use_doc_css; |
45 | | int *pages_in_chapter; |
46 | | } epub_accelerator; |
47 | | |
48 | | typedef struct |
49 | | { |
50 | | fz_document super; |
51 | | fz_archive *zip; |
52 | | fz_html_font_set *set; |
53 | | int count; |
54 | | epub_chapter *spine; |
55 | | fz_outline *outline; |
56 | | char *dc_title, *dc_creator; |
57 | | float layout_w, layout_h, layout_em; |
58 | | epub_accelerator *accel; |
59 | | uint32_t css_sum; |
60 | | |
61 | | /* A common pattern of use is for us to open a document, |
62 | | * load a page, draw it, drop it, load the next page, |
63 | | * draw it, drop it etc. This means that the HTML for |
64 | | * a chapter might get thrown away between the drop and |
65 | | * the the next load (if the chapter is large, and the |
66 | | * store size is low). Accordingly, we store a handle |
67 | | * to the most recently used html block here, thus |
68 | | * ensuring that the stored copy won't be evicted. */ |
69 | | fz_html *most_recent_html; |
70 | | } epub_document; |
71 | | |
72 | | struct epub_chapter |
73 | | { |
74 | | epub_document *doc; |
75 | | char *path; |
76 | | int number; |
77 | | epub_chapter *next; |
78 | | }; |
79 | | |
80 | | struct epub_page |
81 | | { |
82 | | fz_page super; |
83 | | epub_chapter *ch; |
84 | | int number; |
85 | | fz_html *html; |
86 | | }; |
87 | | |
88 | | static uint32_t |
89 | | user_css_sum(fz_context *ctx) |
90 | 0 | { |
91 | 0 | uint32_t sum = 0; |
92 | 0 | const char *css = fz_user_css(ctx); |
93 | 0 | sum = crc32(0, NULL, 0); |
94 | 0 | if (css) |
95 | 0 | sum = crc32(sum, (Byte*)css, (int)strlen(css)); |
96 | 0 | return sum; |
97 | 0 | } |
98 | | |
99 | | static int dummy = 1; |
100 | | |
101 | | struct encrypted { |
102 | | fz_archive super; |
103 | | fz_archive *chain; |
104 | | fz_tree *info; |
105 | | }; |
106 | | |
107 | | static int has_encrypted_entry(fz_context *ctx, fz_archive *arch_, const char *name) |
108 | 0 | { |
109 | 0 | struct encrypted *arch = (struct encrypted *)arch_; |
110 | 0 | return fz_has_archive_entry(ctx, arch->chain, name); |
111 | 0 | } |
112 | | |
113 | | static fz_stream *open_encrypted_entry(fz_context *ctx, fz_archive *arch_, const char *name) |
114 | 0 | { |
115 | 0 | struct encrypted *arch = (struct encrypted *)arch_; |
116 | 0 | if (fz_tree_lookup(ctx, arch->info, name)) |
117 | 0 | return NULL; |
118 | 0 | return fz_open_archive_entry(ctx, arch->chain, name); |
119 | 0 | } |
120 | | |
121 | | static fz_buffer *read_encrypted_entry(fz_context *ctx, fz_archive *arch_, const char *name) |
122 | 0 | { |
123 | 0 | struct encrypted *arch = (struct encrypted *)arch_; |
124 | 0 | if (fz_tree_lookup(ctx, arch->info, name)) |
125 | 0 | return NULL; |
126 | 0 | return fz_read_archive_entry(ctx, arch->chain, name); |
127 | 0 | } |
128 | | |
129 | | static void drop_encrypted_archive(fz_context *ctx, fz_archive *arch_) |
130 | 0 | { |
131 | 0 | struct encrypted *arch = (struct encrypted *)arch_; |
132 | 0 | fz_drop_tree(ctx, arch->info, NULL); |
133 | 0 | fz_drop_archive(ctx, arch->chain); |
134 | 0 | } |
135 | | |
136 | | static fz_archive *new_encrypted_archive(fz_context *ctx, fz_archive *chain, fz_tree *info) |
137 | 0 | { |
138 | 0 | struct encrypted *arch; |
139 | |
|
140 | 0 | arch = fz_new_derived_archive(ctx, NULL, struct encrypted); |
141 | 0 | arch->super.format = "encrypted"; |
142 | 0 | arch->super.has_entry = has_encrypted_entry; |
143 | 0 | arch->super.read_entry = read_encrypted_entry; |
144 | 0 | arch->super.open_entry = open_encrypted_entry; |
145 | 0 | arch->super.drop_archive = drop_encrypted_archive; |
146 | 0 | arch->chain = chain; |
147 | 0 | arch->info = info; |
148 | |
|
149 | 0 | return &arch->super; |
150 | 0 | } |
151 | | |
152 | | static void |
153 | | epub_parse_encryption(fz_context *ctx, epub_document *doc, fz_xml *root) |
154 | 0 | { |
155 | 0 | fz_tree *info = NULL; |
156 | 0 | fz_xml *edata; |
157 | |
|
158 | 0 | for (edata = fz_xml_find_down(root, "EncryptedData"); edata; edata = fz_xml_find_next(edata, "EncryptedData")) |
159 | 0 | { |
160 | 0 | fz_xml *cdata = fz_xml_find_down(edata, "CipherData"); |
161 | 0 | fz_xml *cref = fz_xml_find_down(cdata, "CipherReference"); |
162 | 0 | char *uri = fz_xml_att(cref, "URI"); |
163 | 0 | if (uri) |
164 | 0 | { |
165 | | // TODO: Support reading EncryptedKey and EncryptionMethod to decrypt content. |
166 | 0 | info = fz_tree_insert(ctx, info, uri, &dummy); |
167 | 0 | } |
168 | 0 | } |
169 | |
|
170 | 0 | if (info) |
171 | 0 | { |
172 | 0 | doc->zip = new_encrypted_archive(ctx, doc->zip, info); |
173 | 0 | } |
174 | 0 | } |
175 | | |
176 | | static fz_html *epub_get_laid_out_html(fz_context *ctx, epub_document *doc, epub_chapter *ch); |
177 | | |
178 | | static int count_laid_out_pages(fz_html *html) |
179 | 0 | { |
180 | 0 | if (html->tree.root->s.layout.b > 0) |
181 | 0 | return ceilf(html->tree.root->s.layout.b / html->page_h); |
182 | 0 | return 1; |
183 | 0 | } |
184 | | |
185 | | static void |
186 | | invalidate_accelerator(fz_context *ctx, epub_accelerator *acc) |
187 | 0 | { |
188 | 0 | int i; |
189 | |
|
190 | 0 | for (i = 0; i < acc->max_chapters; i++) |
191 | 0 | acc->pages_in_chapter[i] = -1; |
192 | 0 | } |
193 | | |
194 | | static int count_chapter_pages(fz_context *ctx, epub_document *doc, epub_chapter *ch) |
195 | 0 | { |
196 | 0 | epub_accelerator *acc = doc->accel; |
197 | 0 | int use_doc_css = fz_use_document_css(ctx); |
198 | |
|
199 | 0 | if (use_doc_css != acc->use_doc_css || doc->css_sum != acc->css_sum) |
200 | 0 | { |
201 | 0 | acc->use_doc_css = use_doc_css; |
202 | 0 | acc->css_sum = doc->css_sum; |
203 | 0 | invalidate_accelerator(ctx, acc); |
204 | 0 | } |
205 | |
|
206 | 0 | if (ch->number < acc->num_chapters && acc->pages_in_chapter[ch->number] != -1) |
207 | 0 | return acc->pages_in_chapter[ch->number]; |
208 | | |
209 | 0 | fz_drop_html(ctx, epub_get_laid_out_html(ctx, doc, ch)); |
210 | 0 | return acc->pages_in_chapter[ch->number]; |
211 | 0 | } |
212 | | |
213 | | static fz_link_dest |
214 | | epub_resolve_link(fz_context *ctx, fz_document *doc_, const char *dest) |
215 | 0 | { |
216 | 0 | epub_document *doc = (epub_document*)doc_; |
217 | 0 | epub_chapter *ch; |
218 | 0 | int i; |
219 | |
|
220 | 0 | const char *s = strchr(dest, '#'); |
221 | 0 | size_t n = s ? (size_t)(s - dest) : strlen(dest); |
222 | 0 | if (s && s[1] == 0) |
223 | 0 | s = NULL; |
224 | |
|
225 | 0 | for (i = 0, ch = doc->spine; ch; ++i, ch = ch->next) |
226 | 0 | { |
227 | 0 | if (!strncmp(ch->path, dest, n) && ch->path[n] == 0) |
228 | 0 | { |
229 | 0 | if (s) |
230 | 0 | { |
231 | 0 | float y; |
232 | 0 | fz_html *html = epub_get_laid_out_html(ctx, doc, ch); |
233 | 0 | int ph = html->page_h; |
234 | | |
235 | | /* Search for a matching fragment */ |
236 | 0 | y = fz_find_html_target(ctx, html, s+1); |
237 | 0 | fz_drop_html(ctx, html); |
238 | 0 | if (y >= 0) |
239 | 0 | { |
240 | 0 | int page = y / ph; |
241 | 0 | return fz_make_link_dest_xyz(i, page, 0, y - page * ph, 0); |
242 | 0 | } |
243 | 0 | return fz_make_link_dest_none(); |
244 | 0 | } |
245 | 0 | return fz_make_link_dest_xyz(i, 0, 0, 0, 0); |
246 | 0 | } |
247 | 0 | } |
248 | | |
249 | 0 | return fz_make_link_dest_none(); |
250 | 0 | } |
251 | | |
252 | | static void |
253 | | epub_layout(fz_context *ctx, fz_document *doc_, float w, float h, float em) |
254 | 0 | { |
255 | 0 | epub_document *doc = (epub_document*)doc_; |
256 | 0 | uint32_t css_sum = user_css_sum(ctx); |
257 | 0 | int use_doc_css = fz_use_document_css(ctx); |
258 | |
|
259 | 0 | if (doc->layout_w == w && doc->layout_h == h && doc->layout_em == em && doc->css_sum == css_sum) |
260 | 0 | return; |
261 | 0 | doc->layout_w = w; |
262 | 0 | doc->layout_h = h; |
263 | 0 | doc->layout_em = em; |
264 | |
|
265 | 0 | if (doc->accel == NULL) |
266 | 0 | return; |
267 | | |
268 | | /* When we load the saved accelerator, doc->accel |
269 | | * can be populated with different values than doc. |
270 | | * This is really useful as doc starts out with the |
271 | | * values being 0. If we've got the right values |
272 | | * already, then don't bin the data! */ |
273 | 0 | if (doc->accel->layout_w == w && |
274 | 0 | doc->accel->layout_h == h && |
275 | 0 | doc->accel->layout_em == em && |
276 | 0 | doc->accel->use_doc_css == use_doc_css && |
277 | 0 | doc->accel->css_sum == css_sum) |
278 | 0 | return; |
279 | | |
280 | 0 | doc->accel->layout_w = w; |
281 | 0 | doc->accel->layout_h = h; |
282 | 0 | doc->accel->layout_em = em; |
283 | 0 | doc->accel->use_doc_css = use_doc_css; |
284 | 0 | doc->accel->css_sum = css_sum; |
285 | 0 | invalidate_accelerator(ctx, doc->accel); |
286 | 0 | } |
287 | | |
288 | | static int |
289 | | epub_count_chapters(fz_context *ctx, fz_document *doc_) |
290 | 0 | { |
291 | 0 | epub_document *doc = (epub_document*)doc_; |
292 | 0 | epub_chapter *ch; |
293 | 0 | int count = 0; |
294 | 0 | for (ch = doc->spine; ch; ch = ch->next) |
295 | 0 | ++count; |
296 | 0 | return count; |
297 | 0 | } |
298 | | |
299 | | static int |
300 | | epub_count_pages(fz_context *ctx, fz_document *doc_, int chapter) |
301 | 0 | { |
302 | 0 | epub_document *doc = (epub_document*)doc_; |
303 | 0 | epub_chapter *ch; |
304 | 0 | int i; |
305 | 0 | for (i = 0, ch = doc->spine; ch; ++i, ch = ch->next) |
306 | 0 | { |
307 | 0 | if (i == chapter) |
308 | 0 | { |
309 | 0 | return count_chapter_pages(ctx, doc, ch); |
310 | 0 | } |
311 | 0 | } |
312 | 0 | return 0; |
313 | 0 | } |
314 | | |
315 | 0 | #define MAGIC_ACCELERATOR 0xacce1e7a |
316 | 0 | #define MAGIC_ACCEL_EPUB 0x62755065 |
317 | 0 | #define ACCEL_VERSION 0x00010001 |
318 | | |
319 | | static void epub_load_accelerator(fz_context *ctx, epub_document *doc, fz_stream *accel) |
320 | 0 | { |
321 | 0 | int v; |
322 | 0 | float w, h, em; |
323 | 0 | int num_chapters; |
324 | 0 | epub_accelerator *acc = NULL; |
325 | 0 | uint32_t css_sum; |
326 | 0 | int use_doc_css; |
327 | 0 | int make_new = (accel == NULL); |
328 | |
|
329 | 0 | fz_var(acc); |
330 | |
|
331 | 0 | if (accel) |
332 | 0 | { |
333 | | /* Try to read the accelerator data. If we fail silently give up. */ |
334 | 0 | fz_try(ctx) |
335 | 0 | { |
336 | 0 | v = fz_read_int32_le(ctx, accel); |
337 | 0 | if (v != (int32_t)MAGIC_ACCELERATOR) |
338 | 0 | { |
339 | 0 | make_new = 1; |
340 | 0 | break; |
341 | 0 | } |
342 | | |
343 | 0 | v = fz_read_int32_le(ctx, accel); |
344 | 0 | if (v != MAGIC_ACCEL_EPUB) |
345 | 0 | { |
346 | 0 | make_new = 1; |
347 | 0 | break; |
348 | 0 | } |
349 | | |
350 | 0 | v = fz_read_int32_le(ctx, accel); |
351 | 0 | if (v != ACCEL_VERSION) |
352 | 0 | { |
353 | 0 | make_new = 1; |
354 | 0 | break; |
355 | 0 | } |
356 | | |
357 | 0 | w = fz_read_float_le(ctx, accel); |
358 | 0 | h = fz_read_float_le(ctx, accel); |
359 | 0 | em = fz_read_float_le(ctx, accel); |
360 | 0 | css_sum = fz_read_uint32_le(ctx, accel); |
361 | 0 | use_doc_css = fz_read_int32_le(ctx, accel); |
362 | |
|
363 | 0 | num_chapters = fz_read_int32_le(ctx, accel); |
364 | 0 | if (num_chapters <= 0) |
365 | 0 | { |
366 | 0 | make_new = 1; |
367 | 0 | break; |
368 | 0 | } |
369 | | |
370 | 0 | acc = fz_malloc_struct(ctx, epub_accelerator); |
371 | 0 | acc->pages_in_chapter = Memento_label(fz_malloc_array(ctx, num_chapters, int), "accel_pages_in_chapter"); |
372 | 0 | acc->max_chapters = acc->num_chapters = num_chapters; |
373 | 0 | acc->layout_w = w; |
374 | 0 | acc->layout_h = h; |
375 | 0 | acc->layout_em = em; |
376 | 0 | acc->css_sum = css_sum; |
377 | 0 | acc->use_doc_css = use_doc_css; |
378 | |
|
379 | 0 | for (v = 0; v < num_chapters; v++) |
380 | 0 | acc->pages_in_chapter[v] = fz_read_int32_le(ctx, accel); |
381 | 0 | } |
382 | 0 | fz_catch(ctx) |
383 | 0 | { |
384 | 0 | if (acc) |
385 | 0 | fz_free(ctx, acc->pages_in_chapter); |
386 | 0 | fz_free(ctx, acc); |
387 | | /* Swallow the error and run unaccelerated */ |
388 | 0 | fz_rethrow_if(ctx, FZ_ERROR_SYSTEM); |
389 | 0 | fz_report_error(ctx); |
390 | 0 | make_new = 1; |
391 | 0 | } |
392 | 0 | } |
393 | | |
394 | | /* If we aren't given an accelerator to load (or the one we're given |
395 | | * is bad) create a blank stub and we can fill it out as we go. */ |
396 | 0 | if (make_new) |
397 | 0 | { |
398 | 0 | acc = fz_malloc_struct(ctx, epub_accelerator); |
399 | 0 | acc->css_sum = doc->css_sum; |
400 | 0 | acc->use_doc_css = fz_use_document_css(ctx); |
401 | 0 | } |
402 | |
|
403 | 0 | doc->accel = acc; |
404 | 0 | } |
405 | | |
406 | | static void |
407 | | accelerate_chapter(fz_context *ctx, epub_document *doc, epub_chapter *ch, fz_html *html) |
408 | 0 | { |
409 | 0 | epub_accelerator *acc = doc->accel; |
410 | 0 | int p = count_laid_out_pages(html); |
411 | |
|
412 | 0 | if (ch->number < acc->num_chapters) |
413 | 0 | { |
414 | 0 | if (acc->pages_in_chapter[ch->number] != p && acc->pages_in_chapter[ch->number] != -1) |
415 | 0 | { |
416 | 0 | fz_warn(ctx, "Invalidating stale accelerator data."); |
417 | 0 | invalidate_accelerator(ctx, doc->accel); |
418 | 0 | } |
419 | 0 | acc->pages_in_chapter[ch->number] = p; |
420 | 0 | return; |
421 | 0 | } |
422 | | |
423 | 0 | if (ch->number >= acc->max_chapters) |
424 | 0 | { |
425 | 0 | int n = acc->max_chapters; |
426 | 0 | int i; |
427 | 0 | if (n == 0) |
428 | 0 | n = 4; |
429 | 0 | while (n <= ch->number) |
430 | 0 | n *= 2; |
431 | |
|
432 | 0 | acc->pages_in_chapter = fz_realloc_array(ctx, acc->pages_in_chapter, n, int); |
433 | 0 | for (i = acc->max_chapters; i < n; i++) |
434 | 0 | acc->pages_in_chapter[i] = -1; |
435 | 0 | acc->max_chapters = n; |
436 | 0 | } |
437 | 0 | acc->pages_in_chapter[ch->number] = p; |
438 | 0 | if (acc->num_chapters < ch->number+1) |
439 | 0 | acc->num_chapters = ch->number+1; |
440 | 0 | } |
441 | | |
442 | | static void |
443 | | epub_drop_page(fz_context *ctx, fz_page *page_) |
444 | 0 | { |
445 | 0 | epub_page *page = (epub_page *)page_; |
446 | 0 | fz_drop_html(ctx, page->html); |
447 | 0 | } |
448 | | |
449 | | static epub_chapter * |
450 | | epub_load_chapter(fz_context *ctx, epub_document *doc, const char *path, int i) |
451 | 0 | { |
452 | 0 | epub_chapter *ch; |
453 | |
|
454 | 0 | ch = fz_malloc_struct(ctx, epub_chapter); |
455 | 0 | fz_try(ctx) |
456 | 0 | { |
457 | 0 | ch->path = Memento_label(fz_strdup(ctx, path), "chapter_path"); |
458 | 0 | ch->number = i; |
459 | 0 | } |
460 | 0 | fz_catch(ctx) |
461 | 0 | { |
462 | 0 | fz_free(ctx, ch); |
463 | 0 | fz_rethrow(ctx); |
464 | 0 | } |
465 | | |
466 | 0 | return ch; |
467 | 0 | } |
468 | | |
469 | | static fz_html * |
470 | | epub_parse_chapter(fz_context *ctx, epub_document *doc, epub_chapter *ch) |
471 | 0 | { |
472 | 0 | fz_archive *zip = doc->zip; |
473 | 0 | fz_buffer *buf; |
474 | 0 | char base_uri[2048]; |
475 | 0 | fz_html *html; |
476 | | |
477 | | /* Look for one we made earlier */ |
478 | 0 | html = fz_find_html(ctx, doc, ch->number); |
479 | 0 | if (html) |
480 | 0 | return html; |
481 | | |
482 | 0 | fz_dirname(base_uri, ch->path, sizeof base_uri); |
483 | |
|
484 | 0 | buf = fz_read_archive_entry(ctx, zip, ch->path); |
485 | 0 | fz_try(ctx) |
486 | 0 | html = fz_parse_html(ctx, doc->set, zip, base_uri, buf, fz_user_css(ctx), 1, 1, 0); |
487 | 0 | fz_always(ctx) |
488 | 0 | fz_drop_buffer(ctx, buf); |
489 | 0 | fz_catch(ctx) |
490 | 0 | fz_rethrow(ctx); |
491 | | |
492 | 0 | return fz_store_html(ctx, html, doc, ch->number); |
493 | 0 | } |
494 | | |
495 | | static fz_html * |
496 | | epub_get_laid_out_html(fz_context *ctx, epub_document *doc, epub_chapter *ch) |
497 | 0 | { |
498 | 0 | fz_html *html = epub_parse_chapter(ctx, doc, ch); |
499 | 0 | fz_try(ctx) |
500 | 0 | { |
501 | 0 | fz_layout_html(ctx, html, doc->layout_w, doc->layout_h, doc->layout_em); |
502 | 0 | accelerate_chapter(ctx, doc, ch, html); |
503 | 0 | } |
504 | 0 | fz_catch(ctx) |
505 | 0 | { |
506 | 0 | fz_drop_html(ctx, html); |
507 | 0 | fz_rethrow(ctx); |
508 | 0 | } |
509 | | |
510 | 0 | fz_drop_html(ctx, doc->most_recent_html); |
511 | 0 | doc->most_recent_html = fz_keep_html(ctx, html); |
512 | |
|
513 | 0 | return html; |
514 | 0 | } |
515 | | |
516 | | static fz_rect |
517 | | epub_bound_page(fz_context *ctx, fz_page *page_, fz_box_type box) |
518 | 0 | { |
519 | 0 | epub_document *doc = (epub_document*)page_->doc; |
520 | 0 | epub_page *page = (epub_page*)page_; |
521 | 0 | epub_chapter *ch = page->ch; |
522 | 0 | fz_rect bbox; |
523 | 0 | fz_html *html = epub_get_laid_out_html(ctx, doc, ch); |
524 | |
|
525 | 0 | bbox.x0 = 0; |
526 | 0 | bbox.y0 = 0; |
527 | 0 | bbox.x1 = html->page_w + html->page_margin[L] + html->page_margin[R]; |
528 | 0 | bbox.y1 = html->page_h + html->page_margin[T] + html->page_margin[B]; |
529 | 0 | fz_drop_html(ctx, html); |
530 | 0 | return bbox; |
531 | 0 | } |
532 | | |
533 | | static void |
534 | | epub_run_page(fz_context *ctx, fz_page *page_, fz_device *dev, fz_matrix ctm, fz_cookie *cookie) |
535 | 0 | { |
536 | 0 | epub_page *page = (epub_page*)page_; |
537 | |
|
538 | 0 | fz_draw_html(ctx, dev, ctm, page->html, page->number); |
539 | 0 | } |
540 | | |
541 | | static fz_link * |
542 | | epub_load_links(fz_context *ctx, fz_page *page_) |
543 | 0 | { |
544 | 0 | epub_page *page = (epub_page*)page_; |
545 | 0 | epub_chapter *ch = page->ch; |
546 | |
|
547 | 0 | return fz_load_html_links(ctx, page->html, page->number, ch->path); |
548 | 0 | } |
549 | | |
550 | | static fz_bookmark |
551 | | epub_make_bookmark(fz_context *ctx, fz_document *doc_, fz_location loc) |
552 | 0 | { |
553 | 0 | epub_document *doc = (epub_document*)doc_; |
554 | 0 | epub_chapter *ch; |
555 | 0 | int i; |
556 | |
|
557 | 0 | for (i = 0, ch = doc->spine; ch; ++i, ch = ch->next) |
558 | 0 | { |
559 | 0 | if (i == loc.chapter) |
560 | 0 | { |
561 | 0 | fz_html *html = epub_get_laid_out_html(ctx, doc, ch); |
562 | 0 | fz_bookmark mark = fz_make_html_bookmark(ctx, html, loc.page); |
563 | 0 | fz_drop_html(ctx, html); |
564 | 0 | return mark; |
565 | 0 | } |
566 | 0 | } |
567 | | |
568 | 0 | return 0; |
569 | 0 | } |
570 | | |
571 | | static fz_location |
572 | | epub_lookup_bookmark(fz_context *ctx, fz_document *doc_, fz_bookmark mark) |
573 | 0 | { |
574 | 0 | epub_document *doc = (epub_document*)doc_; |
575 | 0 | epub_chapter *ch; |
576 | 0 | int i; |
577 | |
|
578 | 0 | for (i = 0, ch = doc->spine; ch; ++i, ch = ch->next) |
579 | 0 | { |
580 | 0 | fz_html *html = epub_get_laid_out_html(ctx, doc, ch); |
581 | 0 | int p = fz_lookup_html_bookmark(ctx, html, mark); |
582 | 0 | fz_drop_html(ctx, html); |
583 | 0 | if (p != -1) |
584 | 0 | return fz_make_location(i, p); |
585 | 0 | } |
586 | 0 | return fz_make_location(-1, -1); |
587 | 0 | } |
588 | | |
589 | | static fz_page * |
590 | | epub_load_page(fz_context *ctx, fz_document *doc_, int chapter, int number) |
591 | 0 | { |
592 | 0 | epub_document *doc = (epub_document*)doc_; |
593 | 0 | epub_chapter *ch; |
594 | 0 | int i; |
595 | 0 | for (i = 0, ch = doc->spine; ch; ++i, ch = ch->next) |
596 | 0 | { |
597 | 0 | if (i == chapter) |
598 | 0 | { |
599 | 0 | epub_page *page = fz_new_derived_page(ctx, epub_page, doc_); |
600 | 0 | page->super.bound_page = epub_bound_page; |
601 | 0 | page->super.run_page_contents = epub_run_page; |
602 | 0 | page->super.load_links = epub_load_links; |
603 | 0 | page->super.drop_page = epub_drop_page; |
604 | 0 | page->ch = ch; |
605 | 0 | page->number = number; |
606 | 0 | page->html = epub_get_laid_out_html(ctx, doc, ch); |
607 | 0 | return (fz_page*)page; |
608 | 0 | } |
609 | 0 | } |
610 | 0 | return NULL; |
611 | 0 | } |
612 | | |
613 | | static void |
614 | | epub_page_label(fz_context *ctx, fz_document *doc_, int chapter, int number, char *buf, size_t size) |
615 | 0 | { |
616 | 0 | fz_snprintf(buf, size, "ch. %d, p. %d", chapter+1, number+1); |
617 | 0 | } |
618 | | |
619 | | static void |
620 | | epub_drop_accelerator(fz_context *ctx, epub_accelerator *acc) |
621 | 0 | { |
622 | 0 | if (acc == NULL) |
623 | 0 | return; |
624 | | |
625 | 0 | fz_free(ctx, acc->pages_in_chapter); |
626 | 0 | fz_free(ctx, acc); |
627 | 0 | } |
628 | | |
629 | | static void |
630 | | epub_drop_document(fz_context *ctx, fz_document *doc_) |
631 | 0 | { |
632 | 0 | epub_document *doc = (epub_document*)doc_; |
633 | 0 | epub_chapter *ch, *next; |
634 | 0 | ch = doc->spine; |
635 | 0 | while (ch) |
636 | 0 | { |
637 | 0 | next = ch->next; |
638 | 0 | fz_free(ctx, ch->path); |
639 | 0 | fz_free(ctx, ch); |
640 | 0 | ch = next; |
641 | 0 | } |
642 | 0 | epub_drop_accelerator(ctx, doc->accel); |
643 | 0 | fz_drop_archive(ctx, doc->zip); |
644 | 0 | fz_drop_html_font_set(ctx, doc->set); |
645 | 0 | fz_drop_outline(ctx, doc->outline); |
646 | 0 | fz_free(ctx, doc->dc_title); |
647 | 0 | fz_free(ctx, doc->dc_creator); |
648 | 0 | fz_drop_html(ctx, doc->most_recent_html); |
649 | 0 | fz_purge_stored_html(ctx, doc); |
650 | 0 | } |
651 | | |
652 | | static const char * |
653 | | rel_path_from_idref(fz_xml *manifest, const char *idref) |
654 | 0 | { |
655 | 0 | fz_xml *item; |
656 | 0 | if (!idref) |
657 | 0 | return NULL; |
658 | 0 | item = fz_xml_find_down(manifest, "item"); |
659 | 0 | while (item) |
660 | 0 | { |
661 | 0 | const char *id = fz_xml_att(item, "id"); |
662 | 0 | if (id && !strcmp(id, idref)) |
663 | 0 | return fz_xml_att(item, "href"); |
664 | 0 | item = fz_xml_find_next(item, "item"); |
665 | 0 | } |
666 | 0 | return NULL; |
667 | 0 | } |
668 | | |
669 | | static const char * |
670 | | path_from_idref(char *path, fz_xml *manifest, const char *base_uri, const char *idref, int n) |
671 | 0 | { |
672 | 0 | const char *rel_path = rel_path_from_idref(manifest, idref); |
673 | 0 | if (!rel_path) |
674 | 0 | { |
675 | 0 | path[0] = 0; |
676 | 0 | return NULL; |
677 | 0 | } |
678 | 0 | fz_strlcpy(path, base_uri, n); |
679 | 0 | fz_strlcat(path, "/", n); |
680 | 0 | fz_strlcat(path, rel_path, n); |
681 | 0 | return fz_cleanname(fz_urldecode(path)); |
682 | 0 | } |
683 | | |
684 | | static fz_outline * |
685 | | epub_parse_ncx_imp(fz_context *ctx, epub_document *doc, fz_xml *node, char *base_uri) |
686 | 0 | { |
687 | 0 | char path[2048]; |
688 | 0 | fz_outline *outline, *head, **tailp; |
689 | |
|
690 | 0 | head = NULL; |
691 | 0 | tailp = &head; |
692 | |
|
693 | 0 | node = fz_xml_find_down(node, "navPoint"); |
694 | 0 | while (node) |
695 | 0 | { |
696 | 0 | char *text = fz_xml_text(fz_xml_down(fz_xml_find_down(fz_xml_find_down(node, "navLabel"), "text"))); |
697 | 0 | char *content = fz_xml_att(fz_xml_find_down(node, "content"), "src"); |
698 | 0 | if (text && content) |
699 | 0 | { |
700 | 0 | fz_strlcpy(path, base_uri, sizeof path); |
701 | 0 | fz_strlcat(path, "/", sizeof path); |
702 | 0 | fz_strlcat(path, content, sizeof path); |
703 | 0 | fz_urldecode(path); |
704 | 0 | fz_cleanname(path); |
705 | |
|
706 | 0 | fz_try(ctx) |
707 | 0 | { |
708 | 0 | *tailp = outline = fz_new_outline(ctx); |
709 | 0 | tailp = &(*tailp)->next; |
710 | 0 | outline->title = Memento_label(fz_strdup(ctx, text), "outline_title"); |
711 | 0 | outline->uri = Memento_label(fz_strdup(ctx, path), "outline_uri"); |
712 | 0 | outline->page = fz_make_location(-1, -1); |
713 | 0 | outline->down = epub_parse_ncx_imp(ctx, doc, node, base_uri); |
714 | 0 | outline->is_open = 1; |
715 | 0 | } |
716 | 0 | fz_catch(ctx) |
717 | 0 | { |
718 | 0 | fz_drop_outline(ctx, head); |
719 | 0 | fz_rethrow(ctx); |
720 | 0 | } |
721 | 0 | } |
722 | 0 | node = fz_xml_find_next(node, "navPoint"); |
723 | 0 | } |
724 | | |
725 | 0 | return head; |
726 | 0 | } |
727 | | |
728 | | static void |
729 | | epub_parse_ncx(fz_context *ctx, epub_document *doc, const char *path) |
730 | 0 | { |
731 | 0 | fz_archive *zip = doc->zip; |
732 | 0 | fz_buffer *buf = NULL; |
733 | 0 | fz_xml_doc *ncx = NULL; |
734 | 0 | char base_uri[2048]; |
735 | |
|
736 | 0 | fz_var(buf); |
737 | 0 | fz_var(ncx); |
738 | |
|
739 | 0 | fz_try(ctx) |
740 | 0 | { |
741 | 0 | fz_dirname(base_uri, path, sizeof base_uri); |
742 | 0 | buf = fz_read_archive_entry(ctx, zip, path); |
743 | 0 | ncx = fz_parse_xml(ctx, buf, 0); |
744 | 0 | doc->outline = epub_parse_ncx_imp(ctx, doc, fz_xml_find_down(fz_xml_root(ncx), "navMap"), base_uri); |
745 | 0 | } |
746 | 0 | fz_always(ctx) |
747 | 0 | { |
748 | 0 | fz_drop_buffer(ctx, buf); |
749 | 0 | fz_drop_xml(ctx, ncx); |
750 | 0 | } |
751 | 0 | fz_catch(ctx) |
752 | 0 | fz_rethrow(ctx); |
753 | 0 | } |
754 | | |
755 | | static char * |
756 | | find_metadata(fz_context *ctx, fz_xml *metadata, char *key) |
757 | 0 | { |
758 | 0 | char *text = fz_xml_text(fz_xml_down(fz_xml_find_down(metadata, key))); |
759 | 0 | if (text) |
760 | 0 | return fz_strdup(ctx, text); |
761 | 0 | return NULL; |
762 | 0 | } |
763 | | |
764 | | static fz_buffer * |
765 | | read_container_and_prefix(fz_context *ctx, fz_archive *zip, char *prefix, size_t prefix_len) |
766 | 0 | { |
767 | 0 | int n = fz_count_archive_entries(ctx, zip); |
768 | 0 | int i; |
769 | |
|
770 | 0 | prefix[0] = 0; |
771 | | |
772 | | /* First off, look for the container.xml at the top level. */ |
773 | 0 | for (i = 0; i < n; i++) |
774 | 0 | { |
775 | 0 | const char *p = fz_list_archive_entry(ctx, zip, i); |
776 | |
|
777 | 0 | if (!strcmp(p, "META-INF/container.xml")) |
778 | 0 | return fz_read_archive_entry(ctx, zip, "META-INF/container.xml"); |
779 | 0 | } |
780 | | |
781 | | /* If that failed, look for the first such file in a subdirectory. */ |
782 | 0 | for (i = 0; i < n; i++) |
783 | 0 | { |
784 | 0 | const char *p = fz_list_archive_entry(ctx, zip, i); |
785 | 0 | size_t z = strlen(p); |
786 | 0 | size_t z0 = sizeof("META-INF/container.xml")-1; |
787 | |
|
788 | 0 | if (z < z0) |
789 | 0 | continue; |
790 | 0 | if (!strcmp(p + z - z0, "META-INF/container.xml")) |
791 | 0 | { |
792 | 0 | if (z - z0 >= prefix_len) |
793 | 0 | { |
794 | 0 | fz_warn(ctx, "Ignoring %s as path too long.", p); |
795 | 0 | continue; |
796 | 0 | } |
797 | 0 | memcpy(prefix, p, z-z0); |
798 | 0 | prefix[z-z0] = 0; |
799 | 0 | return fz_read_archive_entry(ctx, zip, p); |
800 | 0 | } |
801 | 0 | } |
802 | | |
803 | 0 | return fz_read_archive_entry(ctx, zip, "META-INF/container.xml"); |
804 | 0 | } |
805 | | |
806 | | static void |
807 | | epub_parse_header(fz_context *ctx, epub_document *doc) |
808 | 0 | { |
809 | 0 | fz_archive *zip = doc->zip; |
810 | 0 | fz_buffer *buf = NULL; |
811 | 0 | fz_xml_doc *encryption_xml = NULL; |
812 | 0 | fz_xml_doc *container_xml = NULL; |
813 | 0 | fz_xml_doc *content_opf = NULL; |
814 | 0 | fz_xml *container, *rootfiles, *rootfile; |
815 | 0 | fz_xml *package, *manifest, *spine, *itemref, *metadata; |
816 | 0 | char base_uri[2048]; |
817 | 0 | const char *full_path; |
818 | 0 | const char *version; |
819 | 0 | char ncx[2048], s[2048]; |
820 | 0 | char *prefixed_full_path = NULL; |
821 | 0 | size_t prefix_len; |
822 | 0 | epub_chapter **tailp; |
823 | 0 | int i; |
824 | |
|
825 | 0 | fz_var(buf); |
826 | 0 | fz_var(encryption_xml); |
827 | 0 | fz_var(container_xml); |
828 | 0 | fz_var(content_opf); |
829 | 0 | fz_var(prefixed_full_path); |
830 | |
|
831 | 0 | fz_try(ctx) |
832 | 0 | { |
833 | | /* parse META-INF/encryption.xml to figure out which entries are encrypted */ |
834 | | |
835 | | /* parse META-INF/container.xml to find OPF */ |
836 | | /* Reuse base_uri to read the prefix. */ |
837 | 0 | buf = read_container_and_prefix(ctx, zip, base_uri, sizeof(base_uri)); |
838 | 0 | container_xml = fz_parse_xml(ctx, buf, 0); |
839 | 0 | fz_drop_buffer(ctx, buf); |
840 | 0 | buf = NULL; |
841 | | |
842 | | /* Some epub files can be prefixed by a directory name. This (normally |
843 | | * empty!) will be in base_uri. */ |
844 | 0 | prefix_len = strlen(base_uri); |
845 | 0 | { |
846 | | /* Further abuse base_uri to hold a temporary name. */ |
847 | 0 | const size_t z0 = sizeof("META-INF/encryption.xml")-1; |
848 | 0 | if (sizeof(base_uri) <= prefix_len + z0) |
849 | 0 | fz_throw(ctx, FZ_ERROR_FORMAT, "Prefix too long in epub"); |
850 | 0 | strcpy(base_uri + prefix_len, "META-INF/encryption.xml"); |
851 | 0 | if (fz_has_archive_entry(ctx, zip, base_uri)) |
852 | 0 | { |
853 | 0 | fz_warn(ctx, "EPUB may be locked by DRM"); |
854 | |
|
855 | 0 | buf = fz_read_archive_entry(ctx, zip, base_uri); |
856 | 0 | encryption_xml = fz_parse_xml(ctx, buf, 0); |
857 | 0 | fz_drop_buffer(ctx, buf); |
858 | 0 | buf = NULL; |
859 | |
|
860 | 0 | epub_parse_encryption(ctx, doc, fz_xml_find(fz_xml_root(encryption_xml), "encryption")); |
861 | 0 | zip = doc->zip; |
862 | 0 | } |
863 | 0 | } |
864 | | |
865 | 0 | container = fz_xml_find(fz_xml_root(container_xml), "container"); |
866 | 0 | rootfiles = fz_xml_find_down(container, "rootfiles"); |
867 | 0 | rootfile = fz_xml_find_down(rootfiles, "rootfile"); |
868 | 0 | full_path = fz_xml_att(rootfile, "full-path"); |
869 | 0 | if (!full_path) |
870 | 0 | fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find root file in EPUB"); |
871 | | |
872 | 0 | fz_dirname(base_uri+prefix_len, full_path, sizeof(base_uri) - prefix_len); |
873 | |
|
874 | 0 | prefixed_full_path = fz_malloc(ctx, strlen(full_path) + prefix_len + 1); |
875 | 0 | memcpy(prefixed_full_path, base_uri, prefix_len); |
876 | 0 | strcpy(prefixed_full_path + prefix_len, full_path); |
877 | | |
878 | | /* parse OPF to find NCX and spine */ |
879 | |
|
880 | 0 | buf = fz_read_archive_entry(ctx, zip, prefixed_full_path); |
881 | 0 | content_opf = fz_parse_xml(ctx, buf, 0); |
882 | 0 | fz_drop_buffer(ctx, buf); |
883 | 0 | buf = NULL; |
884 | |
|
885 | 0 | package = fz_xml_find(fz_xml_root(content_opf), "package"); |
886 | 0 | version = fz_xml_att(package, "version"); |
887 | 0 | if (!version || strcmp(version, "2.0")) |
888 | 0 | fz_warn(ctx, "unknown epub version: %s", version ? version : "<none>"); |
889 | |
|
890 | 0 | metadata = fz_xml_find_down(package, "metadata"); |
891 | 0 | if (metadata) |
892 | 0 | { |
893 | 0 | doc->dc_title = Memento_label(find_metadata(ctx, metadata, "title"), "epub_title"); |
894 | 0 | doc->dc_creator = Memento_label(find_metadata(ctx, metadata, "creator"), "epub_creator"); |
895 | 0 | } |
896 | |
|
897 | 0 | manifest = fz_xml_find_down(package, "manifest"); |
898 | 0 | spine = fz_xml_find_down(package, "spine"); |
899 | |
|
900 | 0 | if (path_from_idref(ncx, manifest, base_uri, fz_xml_att(spine, "toc"), sizeof ncx)) |
901 | 0 | { |
902 | 0 | epub_parse_ncx(ctx, doc, ncx); |
903 | 0 | } |
904 | |
|
905 | 0 | doc->spine = NULL; |
906 | 0 | tailp = &doc->spine; |
907 | 0 | itemref = fz_xml_find_down(spine, "itemref"); |
908 | 0 | i = 0; |
909 | 0 | while (itemref) |
910 | 0 | { |
911 | 0 | if (path_from_idref(s, manifest, base_uri, fz_xml_att(itemref, "idref"), sizeof s)) |
912 | 0 | { |
913 | 0 | fz_try(ctx) |
914 | 0 | { |
915 | 0 | *tailp = epub_load_chapter(ctx, doc, s, i); |
916 | 0 | tailp = &(*tailp)->next; |
917 | 0 | i++; |
918 | 0 | } |
919 | 0 | fz_catch(ctx) |
920 | 0 | { |
921 | 0 | fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); |
922 | 0 | fz_rethrow_if(ctx, FZ_ERROR_SYSTEM); |
923 | 0 | fz_report_error(ctx); |
924 | 0 | fz_warn(ctx, "ignoring chapter %s", s); |
925 | 0 | } |
926 | 0 | } |
927 | 0 | itemref = fz_xml_find_next(itemref, "itemref"); |
928 | 0 | } |
929 | 0 | } |
930 | 0 | fz_always(ctx) |
931 | 0 | { |
932 | 0 | fz_drop_xml(ctx, content_opf); |
933 | 0 | fz_drop_xml(ctx, container_xml); |
934 | 0 | fz_drop_xml(ctx, encryption_xml); |
935 | 0 | fz_drop_buffer(ctx, buf); |
936 | 0 | fz_free(ctx, prefixed_full_path); |
937 | 0 | } |
938 | 0 | fz_catch(ctx) |
939 | 0 | fz_rethrow(ctx); |
940 | 0 | } |
941 | | |
942 | | static fz_outline * |
943 | | epub_load_outline(fz_context *ctx, fz_document *doc_) |
944 | 0 | { |
945 | 0 | epub_document *doc = (epub_document*)doc_; |
946 | 0 | return fz_keep_outline(ctx, doc->outline); |
947 | 0 | } |
948 | | |
949 | | static int |
950 | | epub_lookup_metadata(fz_context *ctx, fz_document *doc_, const char *key, char *buf, size_t size) |
951 | 0 | { |
952 | 0 | epub_document *doc = (epub_document*)doc_; |
953 | 0 | if (!strcmp(key, FZ_META_FORMAT)) |
954 | 0 | return 1 + (int)fz_strlcpy(buf, "EPUB", size); |
955 | 0 | if (!strcmp(key, FZ_META_INFO_TITLE) && doc->dc_title) |
956 | 0 | return 1 + (int)fz_strlcpy(buf, doc->dc_title, size); |
957 | 0 | if (!strcmp(key, FZ_META_INFO_AUTHOR) && doc->dc_creator) |
958 | 0 | return 1 + (int)fz_strlcpy(buf, doc->dc_creator, size); |
959 | 0 | return -1; |
960 | 0 | } |
961 | | |
962 | | static void |
963 | | epub_output_accelerator(fz_context *ctx, fz_document *doc_, fz_output *out) |
964 | 0 | { |
965 | 0 | epub_document *doc = (epub_document*)doc_; |
966 | 0 | int i; |
967 | |
|
968 | 0 | fz_try(ctx) |
969 | 0 | { |
970 | 0 | if (doc->accel == NULL) |
971 | 0 | fz_throw(ctx, FZ_ERROR_ARGUMENT, "No accelerator data to write"); |
972 | | |
973 | 0 | fz_write_int32_le(ctx, out, MAGIC_ACCELERATOR); |
974 | 0 | fz_write_int32_le(ctx, out, MAGIC_ACCEL_EPUB); |
975 | 0 | fz_write_int32_le(ctx, out, ACCEL_VERSION); |
976 | 0 | fz_write_float_le(ctx, out, doc->accel->layout_w); |
977 | 0 | fz_write_float_le(ctx, out, doc->accel->layout_h); |
978 | 0 | fz_write_float_le(ctx, out, doc->accel->layout_em); |
979 | 0 | fz_write_uint32_le(ctx, out, doc->accel->css_sum); |
980 | 0 | fz_write_int32_le(ctx, out, doc->accel->use_doc_css); |
981 | 0 | fz_write_int32_le(ctx, out, doc->accel->num_chapters); |
982 | 0 | for (i = 0; i < doc->accel->num_chapters; i++) |
983 | 0 | fz_write_int32_le(ctx, out, doc->accel->pages_in_chapter[i]); |
984 | |
|
985 | 0 | fz_close_output(ctx, out); |
986 | 0 | } |
987 | 0 | fz_always(ctx) |
988 | 0 | fz_drop_output(ctx, out); |
989 | 0 | fz_catch(ctx) |
990 | 0 | fz_rethrow(ctx); |
991 | 0 | } |
992 | | |
993 | | /* Takes ownership of zip. Will always eventually drop it. |
994 | | * Never takes ownership of accel. */ |
995 | | static fz_document * |
996 | | epub_init(fz_context *ctx, fz_archive *zip, fz_stream *accel) |
997 | 0 | { |
998 | 0 | epub_document *doc = NULL; |
999 | |
|
1000 | 0 | fz_var(doc); |
1001 | 0 | fz_var(zip); |
1002 | |
|
1003 | 0 | fz_try(ctx) |
1004 | 0 | { |
1005 | 0 | doc = fz_new_derived_document(ctx, epub_document); |
1006 | 0 | doc->zip = zip; |
1007 | 0 | zip = NULL; |
1008 | |
|
1009 | 0 | doc->super.drop_document = epub_drop_document; |
1010 | 0 | doc->super.layout = epub_layout; |
1011 | 0 | doc->super.load_outline = epub_load_outline; |
1012 | 0 | doc->super.resolve_link_dest = epub_resolve_link; |
1013 | 0 | doc->super.make_bookmark = epub_make_bookmark; |
1014 | 0 | doc->super.lookup_bookmark = epub_lookup_bookmark; |
1015 | 0 | doc->super.count_chapters = epub_count_chapters; |
1016 | 0 | doc->super.count_pages = epub_count_pages; |
1017 | 0 | doc->super.load_page = epub_load_page; |
1018 | 0 | doc->super.page_label = epub_page_label; |
1019 | 0 | doc->super.lookup_metadata = epub_lookup_metadata; |
1020 | 0 | doc->super.output_accelerator = epub_output_accelerator; |
1021 | 0 | doc->super.is_reflowable = 1; |
1022 | |
|
1023 | 0 | doc->set = fz_new_html_font_set(ctx); |
1024 | 0 | doc->css_sum = user_css_sum(ctx); |
1025 | 0 | epub_load_accelerator(ctx, doc, accel); |
1026 | 0 | epub_parse_header(ctx, doc); |
1027 | 0 | } |
1028 | 0 | fz_catch(ctx) |
1029 | 0 | { |
1030 | 0 | fz_drop_archive(ctx, zip); |
1031 | 0 | fz_drop_document(ctx, &doc->super); |
1032 | 0 | fz_rethrow(ctx); |
1033 | 0 | } |
1034 | | |
1035 | 0 | return (fz_document*)doc; |
1036 | 0 | } |
1037 | | |
1038 | | static fz_document * |
1039 | | epub_open_document(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *dir) |
1040 | 0 | { |
1041 | 0 | fz_stream *file2 = NULL; |
1042 | 0 | fz_document *doc; |
1043 | 0 | fz_archive *zip = NULL; |
1044 | |
|
1045 | 0 | if (file == NULL) |
1046 | 0 | { |
1047 | | /* Directory case: file == NULL and dir == the directory. */ |
1048 | 0 | if (fz_has_archive_entry(ctx, dir, "META-INF/container.xml")) |
1049 | 0 | file2 = file = fz_open_archive_entry(ctx, dir, "META-INF/container.xml"); |
1050 | 0 | else |
1051 | 0 | file2 = file = fz_open_archive_entry(ctx, dir, "META-INF\\container.xml"); |
1052 | 0 | if (file == NULL) |
1053 | 0 | fz_throw(ctx, FZ_ERROR_FORMAT, "Not an epub file"); |
1054 | 0 | zip = fz_keep_archive(ctx, dir); |
1055 | 0 | } |
1056 | 0 | else |
1057 | 0 | { |
1058 | | /* File case: file != NULL and dir can be ignored. */ |
1059 | 0 | zip = fz_open_archive_with_stream(ctx, file); |
1060 | 0 | } |
1061 | | |
1062 | | |
1063 | 0 | fz_try(ctx) |
1064 | 0 | doc = epub_init(ctx, zip, file); |
1065 | 0 | fz_always(ctx) |
1066 | 0 | fz_drop_stream(ctx, file2); |
1067 | 0 | fz_catch(ctx) |
1068 | 0 | fz_rethrow(ctx); |
1069 | | |
1070 | 0 | return doc; |
1071 | 0 | } |
1072 | | |
1073 | | static int |
1074 | | epub_recognize(fz_context *doc, const fz_document_handler *handler, const char *magic) |
1075 | 2.11k | { |
1076 | 2.11k | if (strstr(magic, "META-INF/container.xml") || strstr(magic, "META-INF\\container.xml")) |
1077 | 0 | return 200; |
1078 | 2.11k | return 0; |
1079 | 2.11k | } |
1080 | | |
1081 | | static int |
1082 | | epub_recognize_content(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_archive *dir) |
1083 | 8.83k | { |
1084 | 8.83k | fz_archive *arch = NULL; |
1085 | 8.83k | int ret = 0; |
1086 | | |
1087 | 8.83k | fz_var(arch); |
1088 | 8.83k | fz_var(ret); |
1089 | | |
1090 | 17.6k | fz_try(ctx) |
1091 | 17.6k | { |
1092 | 8.83k | if (stream == NULL) |
1093 | 0 | arch = fz_keep_archive(ctx, dir); |
1094 | 8.83k | else |
1095 | 8.83k | { |
1096 | 8.83k | arch = fz_try_open_archive_with_stream(ctx, stream); |
1097 | 8.83k | if (arch == NULL) |
1098 | 8.68k | break; |
1099 | 8.83k | } |
1100 | | |
1101 | 152 | if (fz_has_archive_entry(ctx, arch, "META-INF/container.xml") || |
1102 | 152 | fz_has_archive_entry(ctx, arch, "META-INF\\container.xml")) |
1103 | 0 | ret = 100; |
1104 | 152 | } |
1105 | 17.6k | fz_always(ctx) |
1106 | 8.83k | fz_drop_archive(ctx, arch); |
1107 | 8.83k | fz_catch(ctx) |
1108 | 101 | fz_rethrow(ctx); |
1109 | | |
1110 | 8.73k | return ret; |
1111 | 8.83k | } |
1112 | | |
1113 | | static const char *epub_extensions[] = |
1114 | | { |
1115 | | "epub", |
1116 | | NULL |
1117 | | }; |
1118 | | |
1119 | | static const char *epub_mimetypes[] = |
1120 | | { |
1121 | | "application/epub+zip", |
1122 | | NULL |
1123 | | }; |
1124 | | |
1125 | | fz_document_handler epub_document_handler = |
1126 | | { |
1127 | | epub_recognize, |
1128 | | epub_open_document, |
1129 | | epub_extensions, |
1130 | | epub_mimetypes, |
1131 | | epub_recognize_content |
1132 | | }; |