Coverage Report

Created: 2026-03-31 07:17

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/mupdf/source/html/html-doc.c
Line
Count
Source
1
// Copyright (C) 2004-2026 Artifex Software, Inc.
2
//
3
// This file is part of MuPDF.
4
//
5
// MuPDF is free software: you can redistribute it and/or modify it under the
6
// terms of the GNU Affero General Public License as published by the Free
7
// Software Foundation, either version 3 of the License, or (at your option)
8
// any later version.
9
//
10
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13
// details.
14
//
15
// You should have received a copy of the GNU Affero General Public License
16
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17
//
18
// Alternative licensing terms are available from the licensor.
19
// For commercial licensing, see <https://www.artifex.com/> or contact
20
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21
// CA 94129, USA, for further information.
22
23
#include "mupdf/fitz.h"
24
#include "html-imp.h"
25
26
#include <string.h>
27
#include <math.h>
28
29
enum { T, R, B, L };
30
31
typedef struct
32
{
33
  fz_document super;
34
  fz_archive *zip;
35
  fz_html_font_set *set;
36
  fz_html *html;
37
  fz_outline *outline;
38
  const fz_htdoc_format_t *format;
39
} html_document;
40
41
typedef struct
42
{
43
  fz_page super;
44
  html_document *doc;
45
  int number;
46
} html_page;
47
48
static void
49
htdoc_drop_document(fz_context *ctx, fz_document *doc_)
50
0
{
51
0
  html_document *doc = (html_document*)doc_;
52
0
  fz_drop_archive(ctx, doc->zip);
53
0
  fz_drop_html(ctx, doc->html);
54
0
  fz_drop_html_font_set(ctx, doc->set);
55
0
  fz_drop_outline(ctx, doc->outline);
56
0
}
57
58
static fz_link_dest
59
htdoc_resolve_link(fz_context *ctx, fz_document *doc_, const char *dest)
60
0
{
61
0
  html_document *doc = (html_document*)doc_;
62
0
  const char *s = strchr(dest, '#');
63
0
  if (s && s[1] != 0)
64
0
  {
65
0
    float y = fz_find_html_target(ctx, doc->html, s+1);
66
0
    if (y >= 0)
67
0
    {
68
0
      int page = y / doc->html->page_h;
69
0
      return fz_make_link_dest_xyz(0, page, 0, y - page * doc->html->page_h, 0);
70
0
    }
71
0
  }
72
73
0
  return fz_make_link_dest_none();
74
0
}
75
76
static int
77
htdoc_count_pages(fz_context *ctx, fz_document *doc_, int chapter)
78
0
{
79
0
  html_document *doc = (html_document*)doc_;
80
0
  if (doc->html->tree.root->s.layout.b > 0)
81
0
    return ceilf(doc->html->tree.root->s.layout.b / doc->html->page_h);
82
0
  return 1;
83
0
}
84
85
static void
86
htdoc_update_outline(fz_context *ctx, fz_document *doc, fz_outline *node)
87
0
{
88
0
  while (node)
89
0
  {
90
0
    fz_link_dest dest = htdoc_resolve_link(ctx, doc, node->uri);
91
0
    node->page = dest.loc;
92
0
    node->x = dest.x;
93
0
    node->y = dest.y;
94
0
    htdoc_update_outline(ctx, doc, node->down);
95
0
    node = node->next;
96
0
  }
97
0
}
98
99
static void
100
htdoc_layout(fz_context *ctx, fz_document *doc_, float w, float h, float em)
101
0
{
102
0
  html_document *doc = (html_document*)doc_;
103
104
0
  fz_layout_html(ctx, doc->html, w, h, em);
105
106
0
  htdoc_update_outline(ctx, doc_, doc->outline);
107
0
}
108
109
static void
110
htdoc_drop_page(fz_context *ctx, fz_page *page_)
111
0
{
112
0
}
113
114
static fz_rect
115
htdoc_bound_page(fz_context *ctx, fz_page *page_, fz_box_type box)
116
0
{
117
0
  html_page *page = (html_page*)page_;
118
0
  html_document *doc = page->doc;
119
0
  fz_rect bbox;
120
0
  bbox.x0 = 0;
121
0
  bbox.y0 = 0;
122
0
  bbox.x1 = doc->html->page_w + doc->html->page_margin[L] + doc->html->page_margin[R];
123
0
  bbox.y1 = doc->html->page_h + doc->html->page_margin[T] + doc->html->page_margin[B];
124
0
  return bbox;
125
0
}
126
127
static void
128
htdoc_run_page(fz_context *ctx, fz_page *page_, fz_device *dev, fz_matrix ctm, fz_cookie *cookie)
129
0
{
130
0
  html_page *page = (html_page*)page_;
131
0
  html_document *doc = page->doc;
132
0
  fz_draw_html(ctx, dev, ctm, doc->html, page->number);
133
0
}
134
135
static fz_link *
136
htdoc_load_links(fz_context *ctx, fz_page *page_)
137
0
{
138
0
  html_page *page = (html_page*)page_;
139
0
  html_document *doc = page->doc;
140
0
  return fz_load_html_links(ctx, doc->html, page->number, "");
141
0
}
142
143
static fz_bookmark
144
htdoc_make_bookmark(fz_context *ctx, fz_document *doc_, fz_location loc)
145
0
{
146
0
  html_document *doc = (html_document*)doc_;
147
0
  return fz_make_html_bookmark(ctx, doc->html, loc.page);
148
0
}
149
150
static fz_location
151
htdoc_lookup_bookmark(fz_context *ctx, fz_document *doc_, fz_bookmark mark)
152
0
{
153
0
  html_document *doc = (html_document*)doc_;
154
0
  return fz_make_location(0, fz_lookup_html_bookmark(ctx, doc->html, mark));
155
0
}
156
157
static fz_page *
158
htdoc_load_page(fz_context *ctx, fz_document *doc_, int chapter, int number)
159
0
{
160
0
  html_document *doc = (html_document*)doc_;
161
0
  html_page *page = fz_new_derived_page(ctx, html_page, doc_);
162
0
  page->super.bound_page = htdoc_bound_page;
163
0
  page->super.run_page_contents = htdoc_run_page;
164
0
  page->super.load_links = htdoc_load_links;
165
0
  page->super.drop_page = htdoc_drop_page;
166
0
  page->doc = doc;
167
0
  page->number = number;
168
0
  return (fz_page*)page;
169
0
}
170
171
static fz_outline *
172
htdoc_load_outline(fz_context *ctx, fz_document *doc_)
173
0
{
174
0
  html_document *doc = (html_document*)doc_;
175
0
  return fz_keep_outline(ctx, doc->outline);
176
0
}
177
178
static int
179
htdoc_lookup_metadata(fz_context *ctx, fz_document *doc_, const char *key, char *buf, size_t size)
180
0
{
181
0
  html_document *doc = (html_document *)doc_;
182
0
  if (!strcmp(key, FZ_META_FORMAT))
183
0
    return 1 + (int)fz_strlcpy(buf, doc->format->format_name, size);
184
0
  if (!strcmp(key, FZ_META_INFO_TITLE) && doc->html->title)
185
0
    return 1 + (int)fz_strlcpy(buf, doc->html->title, size);
186
0
  return -1;
187
0
}
188
189
static fz_html *
190
generic_parse(fz_context *ctx, fz_html_font_set *set, fz_archive *zip, const char *base_uri, fz_buffer *buffer_in, const char *user_css, const fz_htdoc_format_t *format)
191
0
{
192
0
  fz_buffer *buffer_html = NULL;
193
0
  fz_html *html = NULL;
194
195
0
  fz_var(buffer_html);
196
197
0
  fz_try(ctx)
198
0
  {
199
0
    if (format->convert_to_html)
200
0
      buffer_html = format->convert_to_html(ctx, set, buffer_in, zip, user_css);
201
0
    else
202
0
      buffer_html = fz_keep_buffer(ctx, buffer_in);
203
0
    html = fz_parse_html(ctx, set, zip, base_uri, buffer_html, user_css, format->try_xml, format->try_html5, format->patch_mobi);
204
0
  }
205
0
  fz_always(ctx)
206
0
  {
207
0
    fz_drop_buffer(ctx, buffer_html);
208
0
  }
209
0
  fz_catch(ctx)
210
0
  {
211
0
    fz_drop_html(ctx, html);
212
0
    fz_rethrow(ctx);
213
0
  }
214
0
  return html;
215
0
}
216
217
fz_document *
218
fz_htdoc_open_document_with_buffer(fz_context *ctx, fz_archive *dir, fz_buffer *buf, const fz_htdoc_format_t *format)
219
0
{
220
0
  html_document *doc = NULL;
221
222
0
  fz_var(doc);
223
0
  fz_var(dir);
224
225
0
  fz_try(ctx)
226
0
  {
227
0
    doc = fz_new_derived_document(ctx, html_document);
228
0
    doc->super.drop_document = htdoc_drop_document;
229
0
    doc->super.layout = htdoc_layout;
230
0
    doc->super.load_outline = htdoc_load_outline;
231
0
    doc->super.resolve_link_dest = htdoc_resolve_link;
232
0
    doc->super.make_bookmark = htdoc_make_bookmark;
233
0
    doc->super.lookup_bookmark = htdoc_lookup_bookmark;
234
0
    doc->super.count_pages = htdoc_count_pages;
235
0
    doc->super.load_page = htdoc_load_page;
236
0
    doc->super.lookup_metadata = htdoc_lookup_metadata;
237
0
    doc->super.is_reflowable = 1;
238
239
0
    doc->zip = fz_keep_archive(ctx, dir);
240
0
    doc->format = format;
241
0
    doc->set = fz_new_html_font_set(ctx);
242
0
    doc->html = generic_parse(ctx, doc->set, doc->zip, ".", buf, fz_user_css(ctx), format);
243
0
    doc->outline = fz_load_html_outline(ctx, doc->html);
244
0
  }
245
0
  fz_always(ctx)
246
0
    fz_drop_buffer(ctx, buf);
247
0
  fz_catch(ctx)
248
0
  {
249
0
    fz_drop_document(ctx, &doc->super);
250
0
    fz_rethrow(ctx);
251
0
  }
252
253
0
  return (fz_document*)doc;
254
0
}
255
256
fz_document *
257
fz_htdoc_open_document_with_stream_and_dir(fz_context *ctx, fz_stream *stm, fz_archive *dir, const fz_htdoc_format_t *format)
258
0
{
259
0
  fz_buffer *buf = NULL;
260
261
0
  if (stm)
262
0
    buf = fz_read_all(ctx, stm, 0);
263
264
0
  return fz_htdoc_open_document_with_buffer(ctx, dir, buf, format);
265
0
}
266
267
/* Variant specific functions */
268
269
/* Generic HTML document handler */
270
271
static int isws(int c)
272
24
{
273
24
  return c == 32 || c == 9 || c == 10 || c == 13 || c == 12;
274
24
}
275
276
static int recognize_html_content(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_archive *dir, void **hstate, fz_document_recognize_state_free_fn **free_state, int xhtml)
277
24
{
278
24
  uint8_t buffer[4096];
279
24
  size_t i, n, m;
280
24
  enum {
281
24
    state_top,
282
24
    state_open,
283
24
    state_pling,
284
24
    state_query,
285
24
    state_maybe_doctype,
286
24
    state_maybe_doctype_ws,
287
24
    state_maybe_doctype_html,
288
24
    state_maybe_doctype_html_xhtml,
289
24
    state_maybe_comment,
290
24
    state_maybe_html,
291
24
    state_maybe_html_xhtml,
292
24
    state_comment
293
24
  };
294
24
  int state = state_top;
295
24
  int type = 0;
296
297
24
  if (hstate)
298
24
    *hstate = NULL;
299
24
  if (free_state)
300
24
    *free_state = NULL;
301
302
24
  if (stream == NULL)
303
0
    return 0;
304
305
  /* Simple state machine. Search for "<!doctype html" or "<html" in the first
306
   * 4K of the file, allowing for comments and whitespace and case insensitivity. */
307
308
24
  n = fz_read(ctx, stream, buffer, sizeof(buffer));
309
24
  fz_seek(ctx, stream, 0, SEEK_SET);
310
24
  if (n == 0)
311
0
    return 0;
312
313
24
  i = 0;
314
24
  if (n >= 3 && buffer[0] == 0xEF && buffer[1] == 0xBB && buffer[2] == 0xBF)
315
0
  {
316
    /* UTF-8 encoded BOM. Just skip it. */
317
0
    i = 3;
318
0
  }
319
24
  else if (n >= 2 && buffer[0] == 0xFE && buffer[1] == 0xFF)
320
0
  {
321
    /* UTF-16, big endian. */
322
0
    type = 1;
323
0
    i = 2;
324
0
    n &= ~1;
325
0
  }
326
24
  else if (n >= 2 && buffer[0] == 0xFF && buffer[1] == 0xFE)
327
0
  {
328
    /* UTF-16, little endian. */
329
0
    i = 2;
330
0
    type = 2;
331
0
    n &= ~1;
332
0
  }
333
334
24
  while (i < n)
335
24
  {
336
24
    int c;
337
338
24
    switch (type)
339
24
    {
340
24
    case 0: /* UTF-8 */
341
24
      c = buffer[i++];
342
24
      break;
343
0
    case 1: /* UTF-16 - big endian */
344
0
      c = buffer[i++] << 8;
345
0
      c |= buffer[i++];
346
0
      break;
347
0
    case 2: /* UTF-16 - little endian */
348
0
      c = buffer[i++];
349
0
      c |= buffer[i++] << 8;
350
0
      break;
351
24
    }
352
353
24
    switch (state)
354
24
    {
355
24
    case state_top:
356
24
      if (isws(c))
357
0
        continue; /* whitespace */
358
24
      if (c == '<')
359
0
        state = state_open;
360
24
      else
361
24
        return 0; /* Non whitespace found at the top level prior to a known tag. Fail. */
362
0
      break;
363
0
    case state_open:
364
0
      if (isws(c))
365
0
        continue; /* whitespace */
366
0
      if (c == '!')
367
0
        state = state_pling;
368
0
      else if (c == '?')
369
0
        state = state_query;
370
0
      else if (c == 'h' || c == 'H')
371
0
        state = state_maybe_html;
372
0
      else
373
0
        return 0; /* Not an acceptable opening tag. */
374
0
      m = 0;
375
0
      break;
376
0
    case state_query:
377
0
      if (c == '>')
378
0
        state = state_top;
379
0
      break;
380
0
    case state_pling:
381
0
      if (isws(c))
382
0
        continue; /* whitespace */
383
0
      else if (c == '-')
384
0
        state = state_maybe_comment;
385
0
      else if (c == 'd' || c == 'D')
386
0
        state = state_maybe_doctype;
387
0
      else
388
0
        return 0; /* Not an acceptable opening tag. */
389
0
      break;
390
0
    case state_maybe_comment:
391
0
      if (c == '-')
392
0
        state = state_comment;
393
0
      else
394
0
        return 0; /* Not an acceptable opening tag. */
395
0
      break;
396
0
    case state_comment:
397
0
      if (c == '-')
398
0
      {
399
0
        m++;
400
0
      }
401
0
      else if (c == '>' && m >= 2)
402
0
      {
403
0
        state = state_top;
404
0
      }
405
0
      else
406
0
        m = 0;
407
0
      break;
408
0
    case state_maybe_doctype:
409
0
      if (c == "octype"[m] || c == "OCTYPE"[m])
410
0
      {
411
0
        m++;
412
0
        if (m == 6)
413
0
        {
414
0
          state = state_maybe_doctype_ws;
415
0
          m = 0;
416
0
        }
417
0
      }
418
0
      else
419
0
        return 0; /* Not an acceptable opening tag. */
420
0
      break;
421
0
    case state_maybe_doctype_ws:
422
0
      if (isws(c))
423
0
        m++;
424
0
      else if (m > 0 && (c == 'h' || c == 'H'))
425
0
      {
426
0
        state = state_maybe_doctype_html;
427
0
        m = 0;
428
0
      }
429
0
      else
430
0
        return 0; /* Not an acceptable opening tag. */
431
0
      break;
432
0
    case state_maybe_doctype_html:
433
0
      if (c == "tml"[m] || c == "TML"[m])
434
0
      {
435
0
        m++;
436
0
        if (m == 3)
437
0
        {
438
0
          state = state_maybe_doctype_html_xhtml;
439
0
          m = 0;
440
0
        }
441
0
      }
442
0
      else
443
0
        return 0; /* Not an acceptable opening tag. */
444
0
      break;
445
0
    case state_maybe_doctype_html_xhtml:
446
0
      if (c == '>')
447
0
      {
448
        /* Not xhtml - the xhtml agent can handle this at a pinch (so 25),
449
         * but we'd rather the html one did (75). */
450
0
        return xhtml ? 25 : 75;
451
0
      }
452
0
      if (c >= 'A'  && c <= 'Z')
453
0
        c += 'a'-'A';
454
0
      if (c == "xhtml"[m])
455
0
      {
456
0
        m++;
457
0
        if (m == 5)
458
0
        {
459
          /* xhtml - the xhtml agent would be better (75) than the html
460
           * agent (25). */
461
0
          return xhtml ? 75 : 25;
462
0
        }
463
0
      }
464
0
      else
465
0
        m = 0;
466
0
      break;
467
0
    case state_maybe_html:
468
0
      if (c == "tml"[m] || c == "TML"[m])
469
0
      {
470
0
        m++;
471
0
        if (m == 3)
472
0
        {
473
0
          state = state_maybe_html_xhtml;
474
0
          m = 0;
475
0
        }
476
0
      }
477
0
      else
478
0
        return 0; /* Not an acceptable opening tag. */
479
0
      break;
480
0
    case state_maybe_html_xhtml:
481
0
      if (c == '>')
482
0
      {
483
        /* Not xhtml - the xhtml agent can handle this at a pinch (so 25),
484
         * but we'd rather the html one did (75). */
485
0
        return xhtml ? 25 : 75;
486
0
      }
487
0
      if (c >= 'A'  && c <= 'Z')
488
0
        c += 'a'-'A';
489
0
      if (c == "xhtml"[m])
490
0
      {
491
0
        m++;
492
0
        if (m == 5)
493
0
        {
494
          /* xhtml - the xhtml agent would be better (75) than the html
495
           * agent (25). */
496
0
          return xhtml ? 75 : 25;
497
0
        }
498
0
      }
499
0
      else
500
0
        m = 0;
501
0
      break;
502
24
    }
503
24
  }
504
505
0
  return 0;
506
24
}
507
508
int htdoc_recognize_html_content(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_archive *dir, void **hstate, fz_document_recognize_state_free_fn **free_state)
509
12
{
510
12
  return recognize_html_content(ctx, handler, stream, dir, hstate, free_state, 0);
511
12
}
512
513
static const fz_htdoc_format_t fz_htdoc_html5 =
514
{
515
  "HTML5",
516
  NULL,
517
  0, 1, 0
518
};
519
520
static fz_document *
521
htdoc_open_document(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *dir, void *state)
522
0
{
523
0
  return fz_htdoc_open_document_with_stream_and_dir(ctx, file, dir, &fz_htdoc_html5);
524
0
}
525
526
static const char *htdoc_extensions[] =
527
{
528
  "htm",
529
  "html",
530
  NULL
531
};
532
533
static const char *htdoc_mimetypes[] =
534
{
535
  "text/html",
536
  NULL
537
};
538
539
fz_document_handler html_document_handler =
540
{
541
  NULL,
542
  htdoc_open_document,
543
  htdoc_extensions,
544
  htdoc_mimetypes,
545
  htdoc_recognize_html_content,
546
  1
547
};
548
549
/* XHTML document handler */
550
551
static const fz_htdoc_format_t fz_htdoc_xhtml =
552
{
553
  "XHTML",
554
  NULL,
555
  1, 1, 0
556
};
557
558
static fz_document *
559
xhtdoc_open_document(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *dir, void *state)
560
0
{
561
0
  return fz_htdoc_open_document_with_stream_and_dir(ctx, file, dir, &fz_htdoc_xhtml);
562
0
}
563
564
int xhtdoc_recognize_xhtml_content(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_archive *dir, void **hstate, fz_document_recognize_state_free_fn **free_state)
565
12
{
566
12
  return recognize_html_content(ctx, handler, stream, dir, hstate, free_state, 1);
567
12
}
568
569
static const char *xhtdoc_extensions[] =
570
{
571
  "xhtml",
572
  NULL
573
};
574
575
static const char *xhtdoc_mimetypes[] =
576
{
577
  "application/xhtml+xml",
578
  NULL
579
};
580
581
fz_document_handler xhtml_document_handler =
582
{
583
  NULL,
584
  xhtdoc_open_document,
585
  xhtdoc_extensions,
586
  xhtdoc_mimetypes,
587
  xhtdoc_recognize_xhtml_content,
588
  1
589
};
590
591
/* FB2 document handler */
592
593
static const fz_htdoc_format_t fz_htdoc_fb2 =
594
{
595
  "FictionBook2",
596
  NULL,
597
  1, 0, 0
598
};
599
600
static fz_document *
601
fb2doc_open_document(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *dir, void *state)
602
0
{
603
0
  return fz_htdoc_open_document_with_stream_and_dir(ctx, file, dir, &fz_htdoc_fb2);
604
0
}
605
606
static int
607
fb2doc_recognize_content(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_archive *dir, void **state, fz_document_recognize_state_free_fn **free_state)
608
12
{
609
12
  const char *match = "<FictionBook";
610
12
  int pos = 0;
611
12
  int n = 4096;
612
12
  int c;
613
614
12
  if (state)
615
12
    *state = NULL;
616
12
  if (free_state)
617
12
    *free_state = NULL;
618
619
12
  if (stream == NULL)
620
0
    return 0;
621
622
12
  do
623
49.1k
  {
624
49.1k
    c = fz_read_byte(ctx, stream);
625
49.1k
    if (c == EOF)
626
0
      return 0;
627
49.1k
    if (c == match[pos])
628
206
    {
629
206
      pos++;
630
206
      if (pos == 12)
631
0
        return 100;
632
206
    }
633
48.9k
    else
634
48.9k
    {
635
      /* Restart matching, but recheck c against the start. */
636
48.9k
      pos = (c == match[0]);
637
48.9k
    }
638
49.1k
  }
639
49.1k
  while (--n > 0);
640
641
12
  return 0;
642
12
}
643
644
static const char *fb2doc_extensions[] =
645
{
646
  "fb2",
647
  "xml",
648
  NULL
649
};
650
651
static const char *fb2doc_mimetypes[] =
652
{
653
  "application/x-fictionbook",
654
  "application/xml",
655
  "text/xml",
656
  NULL
657
};
658
659
fz_document_handler fb2_document_handler =
660
{
661
  NULL,
662
  fb2doc_open_document,
663
  fb2doc_extensions,
664
  fb2doc_mimetypes,
665
  fb2doc_recognize_content
666
};
667
668
/* Mobi document handler */
669
670
static const fz_htdoc_format_t fz_htdoc_mobi =
671
{
672
  "MOBI",
673
  NULL,
674
  1, 1, 1
675
};
676
677
static fz_document *
678
mobi_open_document_with_buffer(fz_context *ctx, fz_buffer *mobi)
679
0
{
680
0
  fz_archive *dir = NULL;
681
0
  fz_buffer *html;
682
0
  fz_document *doc;
683
0
  fz_var(dir);
684
0
  fz_try(ctx)
685
0
  {
686
0
    dir = fz_extract_html_from_mobi(ctx, mobi);
687
0
    html = fz_read_archive_entry(ctx, dir, "index.html");
688
0
    doc = fz_htdoc_open_document_with_buffer(ctx, dir, html, &fz_htdoc_mobi);
689
0
  }
690
0
  fz_always(ctx)
691
0
  {
692
0
    fz_drop_buffer(ctx, mobi);
693
0
    fz_drop_archive(ctx, dir);
694
0
  }
695
0
  fz_catch(ctx)
696
0
  {
697
0
    fz_rethrow(ctx);
698
0
  }
699
0
  return doc;
700
0
}
701
702
static int
703
mobi_recognize_content(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_archive *dir, void **state, fz_document_recognize_state_free_fn **free_state)
704
12
{
705
12
  char text[8];
706
707
12
  if (state)
708
12
    *state = NULL;
709
12
  if (free_state)
710
12
    *free_state = NULL;
711
712
12
  if (stream == NULL)
713
0
    return 0;
714
715
12
  fz_seek(ctx, stream, 32 + 28, SEEK_SET);
716
12
  if (fz_read(ctx, stream, (unsigned char *)text, 8) != 8)
717
0
    return 0;
718
12
  if (memcmp(text, "BOOKMOBI", 8) == 0)
719
0
    return 100;
720
12
  if (memcmp(text, "TEXtREAd", 8) == 0)
721
0
    return 100;
722
723
12
  return 0;
724
12
}
725
726
static fz_document *
727
mobi_open_document(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *dir, void *state)
728
0
{
729
0
  return mobi_open_document_with_buffer(ctx, fz_read_all(ctx, file, 0));
730
0
}
731
732
static const char *mobi_extensions[] =
733
{
734
  "mobi",
735
  "prc",
736
  "pdb",
737
  NULL
738
};
739
740
static const char *mobi_mimetypes[] =
741
{
742
  "application/x-mobipocket-ebook",
743
  NULL
744
};
745
746
fz_document_handler mobi_document_handler =
747
{
748
  NULL,
749
  mobi_open_document,
750
  mobi_extensions,
751
  mobi_mimetypes,
752
  mobi_recognize_content
753
};