/src/mupdf/source/cbz/mucbz.c
Line | Count | Source |
1 | | // Copyright (C) 2004-2026 Artifex Software, Inc. |
2 | | // |
3 | | // This file is part of MuPDF. |
4 | | // |
5 | | // MuPDF is free software: you can redistribute it and/or modify it under the |
6 | | // terms of the GNU Affero General Public License as published by the Free |
7 | | // Software Foundation, either version 3 of the License, or (at your option) |
8 | | // any later version. |
9 | | // |
10 | | // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY |
11 | | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
12 | | // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more |
13 | | // details. |
14 | | // |
15 | | // You should have received a copy of the GNU Affero General Public License |
16 | | // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> |
17 | | // |
18 | | // Alternative licensing terms are available from the licensor. |
19 | | // For commercial licensing, see <https://www.artifex.com/> or contact |
20 | | // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, |
21 | | // CA 94129, USA, for further information. |
22 | | |
23 | | #include "mupdf/fitz.h" |
24 | | |
25 | | #include <string.h> |
26 | | #include <stdlib.h> |
27 | | |
28 | 0 | #define DPI 72.0f |
29 | | |
30 | | static const char *cbz_ext_list[] = { |
31 | | ".bmp", |
32 | | ".gif", |
33 | | ".hdp", |
34 | | ".j2k", |
35 | | ".jb2", |
36 | | ".jbig2", |
37 | | ".jp2", |
38 | | ".jpeg", |
39 | | ".jpg", |
40 | | ".jpx", |
41 | | ".jxr", |
42 | | ".pam", |
43 | | ".pbm", |
44 | | ".pgm", |
45 | | ".pkm", |
46 | | ".png", |
47 | | ".pnm", |
48 | | ".ppm", |
49 | | ".tif", |
50 | | ".tiff", |
51 | | ".wdp", |
52 | | NULL |
53 | | }; |
54 | | |
55 | | typedef struct |
56 | | { |
57 | | fz_page super; |
58 | | fz_image *image; |
59 | | } cbz_page; |
60 | | |
61 | | typedef struct |
62 | | { |
63 | | fz_document super; |
64 | | fz_archive *arch; |
65 | | int page_count; |
66 | | const char **page; |
67 | | } cbz_document; |
68 | | |
69 | | static inline int cbz_isdigit(int c) |
70 | 0 | { |
71 | 0 | return c >= '0' && c <= '9'; |
72 | 0 | } |
73 | | |
74 | | static inline int cbz_toupper(int c) |
75 | 0 | { |
76 | 0 | if (c >= 'a' && c <= 'z') |
77 | 0 | return c - 'a' + 'A'; |
78 | 0 | return c; |
79 | 0 | } |
80 | | |
81 | | static inline int |
82 | | cbz_strnatcmp(const char *a, const char *b) |
83 | 0 | { |
84 | 0 | int x, y; |
85 | |
|
86 | 0 | while (*a || *b) |
87 | 0 | { |
88 | 0 | if (cbz_isdigit(*a) && cbz_isdigit(*b)) |
89 | 0 | { |
90 | 0 | x = *a++ - '0'; |
91 | 0 | while (cbz_isdigit(*a)) |
92 | 0 | x = x * 10 + *a++ - '0'; |
93 | 0 | y = *b++ - '0'; |
94 | 0 | while (cbz_isdigit(*b)) |
95 | 0 | y = y * 10 + *b++ - '0'; |
96 | 0 | } |
97 | 0 | else |
98 | 0 | { |
99 | 0 | x = cbz_toupper(*a++); |
100 | 0 | y = cbz_toupper(*b++); |
101 | 0 | } |
102 | 0 | if (x < y) |
103 | 0 | return -1; |
104 | 0 | if (x > y) |
105 | 0 | return 1; |
106 | 0 | } |
107 | | |
108 | 0 | return 0; |
109 | 0 | } |
110 | | |
111 | | static int |
112 | | cbz_compare_page_names(const void *a, const void *b) |
113 | 0 | { |
114 | 0 | return cbz_strnatcmp(*(const char **)a, *(const char **)b); |
115 | 0 | } |
116 | | |
117 | | static void |
118 | | cbz_create_page_list(fz_context *ctx, cbz_document *doc) |
119 | 0 | { |
120 | 0 | fz_archive *arch = doc->arch; |
121 | 0 | int i, k, count; |
122 | |
|
123 | 0 | count = fz_count_archive_entries(ctx, arch); |
124 | |
|
125 | 0 | doc->page_count = 0; |
126 | 0 | doc->page = fz_malloc_array(ctx, count, const char *); |
127 | |
|
128 | 0 | for (i = 0; i < count; i++) |
129 | 0 | { |
130 | 0 | const char *name = fz_list_archive_entry(ctx, arch, i); |
131 | 0 | const char *ext = name ? strrchr(name, '.') : NULL; |
132 | 0 | for (k = 0; cbz_ext_list[k]; k++) |
133 | 0 | { |
134 | 0 | if (ext && !fz_strcasecmp(ext, cbz_ext_list[k])) |
135 | 0 | { |
136 | 0 | doc->page[doc->page_count++] = name; |
137 | 0 | break; |
138 | 0 | } |
139 | 0 | } |
140 | 0 | } |
141 | |
|
142 | 0 | qsort((char **)doc->page, doc->page_count, sizeof *doc->page, cbz_compare_page_names); |
143 | 0 | } |
144 | | |
145 | | static void |
146 | | cbz_drop_document(fz_context *ctx, fz_document *doc_) |
147 | 0 | { |
148 | 0 | cbz_document *doc = (cbz_document*)doc_; |
149 | 0 | fz_drop_archive(ctx, doc->arch); |
150 | 0 | fz_free(ctx, (char **)doc->page); |
151 | 0 | } |
152 | | |
153 | | static int |
154 | | cbz_count_pages(fz_context *ctx, fz_document *doc_, int chapter) |
155 | 0 | { |
156 | 0 | cbz_document *doc = (cbz_document*)doc_; |
157 | 0 | return doc->page_count; |
158 | 0 | } |
159 | | |
160 | | static fz_rect |
161 | | cbz_bound_page(fz_context *ctx, fz_page *page_, fz_box_type box) |
162 | 0 | { |
163 | 0 | cbz_page *page = (cbz_page*)page_; |
164 | 0 | fz_image *image = page->image; |
165 | 0 | int xres, yres; |
166 | 0 | fz_rect bbox = fz_empty_rect; |
167 | 0 | uint8_t orientation; |
168 | |
|
169 | 0 | if (image) |
170 | 0 | { |
171 | 0 | fz_image_resolution(image, &xres, &yres); |
172 | 0 | bbox.x0 = bbox.y0 = 0; |
173 | 0 | orientation = fz_image_orientation(ctx, image); |
174 | 0 | if (orientation == 0 || (orientation & 1) == 1) |
175 | 0 | { |
176 | 0 | bbox.x1 = image->w * DPI / xres; |
177 | 0 | bbox.y1 = image->h * DPI / yres; |
178 | 0 | } |
179 | 0 | else |
180 | 0 | { |
181 | 0 | bbox.y1 = image->w * DPI / xres; |
182 | 0 | bbox.x1 = image->h * DPI / yres; |
183 | 0 | } |
184 | 0 | } |
185 | 0 | return bbox; |
186 | 0 | } |
187 | | |
188 | | static void |
189 | | cbz_run_page(fz_context *ctx, fz_page *page_, fz_device *dev, fz_matrix ctm, fz_cookie *cookie) |
190 | 0 | { |
191 | 0 | cbz_page *page = (cbz_page*)page_; |
192 | 0 | fz_image *image = page->image; |
193 | 0 | int xres, yres; |
194 | 0 | float w, h; |
195 | 0 | uint8_t orientation; |
196 | 0 | fz_matrix immat; |
197 | |
|
198 | 0 | if (image) |
199 | 0 | { |
200 | 0 | fz_try(ctx) |
201 | 0 | { |
202 | 0 | fz_image_resolution(image, &xres, &yres); |
203 | 0 | orientation = fz_image_orientation(ctx, image); |
204 | 0 | if (orientation == 0 || (orientation & 1) == 1) |
205 | 0 | { |
206 | 0 | w = image->w * DPI / xres; |
207 | 0 | h = image->h * DPI / yres; |
208 | 0 | } |
209 | 0 | else |
210 | 0 | { |
211 | 0 | h = image->w * DPI / xres; |
212 | 0 | w = image->h * DPI / yres; |
213 | 0 | } |
214 | 0 | immat = fz_image_orientation_matrix(ctx, image); |
215 | 0 | immat = fz_post_scale(immat, w, h); |
216 | 0 | ctm = fz_concat(immat, ctm); |
217 | 0 | fz_fill_image(ctx, dev, image, ctm, 1, fz_default_color_params); |
218 | 0 | } |
219 | 0 | fz_catch(ctx) |
220 | 0 | { |
221 | 0 | fz_report_error(ctx); |
222 | 0 | fz_warn(ctx, "cannot render image on page"); |
223 | 0 | } |
224 | 0 | } |
225 | 0 | } |
226 | | |
227 | | static void |
228 | | cbz_drop_page(fz_context *ctx, fz_page *page_) |
229 | 0 | { |
230 | 0 | cbz_page *page = (cbz_page*)page_; |
231 | 0 | fz_drop_image(ctx, page->image); |
232 | 0 | } |
233 | | |
234 | | static fz_page * |
235 | | cbz_load_page(fz_context *ctx, fz_document *doc_, int chapter, int number) |
236 | 0 | { |
237 | 0 | cbz_document *doc = (cbz_document*)doc_; |
238 | 0 | cbz_page *page = NULL; |
239 | 0 | fz_buffer *buf = NULL; |
240 | |
|
241 | 0 | if (number < 0 || number >= doc->page_count) |
242 | 0 | fz_throw(ctx, FZ_ERROR_ARGUMENT, "invalid page number %d", number); |
243 | | |
244 | 0 | fz_var(page); |
245 | 0 | fz_var(buf); |
246 | |
|
247 | 0 | page = fz_new_derived_page(ctx, cbz_page, doc_); |
248 | 0 | page->super.bound_page = cbz_bound_page; |
249 | 0 | page->super.run_page_contents = cbz_run_page; |
250 | 0 | page->super.drop_page = cbz_drop_page; |
251 | |
|
252 | 0 | fz_try(ctx) |
253 | 0 | { |
254 | 0 | buf = fz_read_archive_entry(ctx, doc->arch, doc->page[number]); |
255 | 0 | page->image = fz_new_image_from_buffer(ctx, buf); |
256 | 0 | } |
257 | 0 | fz_always(ctx) |
258 | 0 | { |
259 | 0 | fz_drop_buffer(ctx, buf); |
260 | 0 | } |
261 | 0 | fz_catch(ctx) |
262 | 0 | { |
263 | 0 | fz_report_error(ctx); |
264 | 0 | fz_warn(ctx, "cannot decode image on page, leaving it blank"); |
265 | 0 | } |
266 | |
|
267 | 0 | return (fz_page*)page; |
268 | 0 | } |
269 | | |
270 | | static int |
271 | | cbz_lookup_metadata(fz_context *ctx, fz_document *doc_, const char *key, char *buf, size_t size) |
272 | 0 | { |
273 | 0 | cbz_document *doc = (cbz_document*)doc_; |
274 | 0 | if (!strcmp(key, FZ_META_FORMAT)) |
275 | 0 | return 1 + (int) fz_strlcpy(buf, fz_archive_format(ctx, doc->arch), size); |
276 | 0 | return -1; |
277 | 0 | } |
278 | | |
279 | | static fz_document * |
280 | | cbz_open_document(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *dir, void *state) |
281 | 0 | { |
282 | 0 | cbz_document *doc = fz_new_derived_document(ctx, cbz_document); |
283 | |
|
284 | 0 | doc->super.drop_document = cbz_drop_document; |
285 | 0 | doc->super.count_pages = cbz_count_pages; |
286 | 0 | doc->super.load_page = cbz_load_page; |
287 | 0 | doc->super.lookup_metadata = cbz_lookup_metadata; |
288 | |
|
289 | 0 | fz_try(ctx) |
290 | 0 | { |
291 | 0 | if (file) |
292 | 0 | doc->arch = fz_open_archive_with_stream(ctx, file); |
293 | 0 | else |
294 | 0 | doc->arch = fz_keep_archive(ctx, dir); |
295 | 0 | cbz_create_page_list(ctx, doc); |
296 | 0 | } |
297 | 0 | fz_catch(ctx) |
298 | 0 | { |
299 | 0 | fz_drop_document(ctx, (fz_document*)doc); |
300 | 0 | fz_rethrow(ctx); |
301 | 0 | } |
302 | 0 | return (fz_document*)doc; |
303 | 0 | } |
304 | | |
305 | | static const char *cbz_extensions[] = |
306 | | { |
307 | | #ifdef HAVE_LIBARCHIVE |
308 | | "cbr", |
309 | | #endif |
310 | | "cbt", |
311 | | "cbz", |
312 | | "tar", |
313 | | "zip", |
314 | | NULL |
315 | | }; |
316 | | |
317 | | static const char *cbz_mimetypes[] = |
318 | | { |
319 | | #ifdef HAVE_LIBARCHIVE |
320 | | "application/vnd.comicbook-rar", |
321 | | #endif |
322 | | "application/vnd.comicbook+zip", |
323 | | #ifdef HAVE_LIBARCHIVE |
324 | | "application/x-cbr", |
325 | | #endif |
326 | | "application/x-cbt", |
327 | | "application/x-cbz", |
328 | | "application/x-tar", |
329 | | "application/zip", |
330 | | NULL |
331 | | }; |
332 | | |
333 | | static int |
334 | | cbz_recognize_doc_content(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_archive *dir, void **state, fz_document_recognize_state_free_fn **freestate) |
335 | 69 | { |
336 | 69 | fz_archive *arch = NULL; |
337 | 69 | int ret = 0; |
338 | 69 | int i, k, count; |
339 | | |
340 | 69 | fz_var(arch); |
341 | 69 | fz_var(ret); |
342 | | |
343 | 138 | fz_try(ctx) |
344 | 138 | { |
345 | 69 | if (stream == NULL) |
346 | 0 | arch = fz_keep_archive(ctx, dir); |
347 | 69 | else |
348 | 69 | { |
349 | 69 | arch = fz_try_open_archive_with_stream(ctx, stream); |
350 | 69 | if (arch == NULL) |
351 | 69 | break; |
352 | 69 | } |
353 | | |
354 | | /* If it's an archive, and we can find at least one plausible page |
355 | | * then we can open it as a cbz. */ |
356 | 0 | count = fz_count_archive_entries(ctx, arch); |
357 | 0 | for (i = 0; i < count && ret == 0; i++) |
358 | 0 | { |
359 | 0 | const char *name = fz_list_archive_entry(ctx, arch, i); |
360 | 0 | const char *ext; |
361 | 0 | if (name == NULL) |
362 | 0 | continue; |
363 | 0 | ext = strrchr(name, '.'); |
364 | 0 | if (ext) |
365 | 0 | { |
366 | 0 | for (k = 0; cbz_ext_list[k]; k++) |
367 | 0 | { |
368 | 0 | if (!fz_strcasecmp(ext, cbz_ext_list[k])) |
369 | 0 | { |
370 | 0 | ret = 25; |
371 | 0 | break; |
372 | 0 | } |
373 | 0 | } |
374 | 0 | } |
375 | 0 | } |
376 | 0 | } |
377 | 138 | fz_always(ctx) |
378 | 69 | fz_drop_archive(ctx, arch); |
379 | 69 | fz_catch(ctx) |
380 | 0 | fz_rethrow(ctx); |
381 | | |
382 | 69 | return ret; |
383 | 69 | } |
384 | | |
385 | | fz_document_handler cbz_document_handler = |
386 | | { |
387 | | NULL, |
388 | | cbz_open_document, |
389 | | cbz_extensions, |
390 | | cbz_mimetypes, |
391 | | cbz_recognize_doc_content |
392 | | }; |