Coverage Report

Created: 2026-06-08 06:46

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/mupdf/source/html/md.c
Line
Count
Source
1
// Copyright (C) 2023-2026 Artifex Software, Inc.
2
//
3
// This file is part of MuPDF.
4
//
5
// MuPDF is free software: you can redistribute it and/or modify it under the
6
// terms of the GNU Affero General Public License as published by the Free
7
// Software Foundation, either version 3 of the License, or (at your option)
8
// any later version.
9
//
10
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13
// details.
14
//
15
// You should have received a copy of the GNU Affero General Public License
16
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17
//
18
// Alternative licensing terms are available from the licensor.
19
// For commercial licensing, see <https://www.artifex.com/> or contact
20
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21
// CA 94129, USA, for further information.
22
23
#include "mupdf/fitz.h"
24
#include "html-imp.h"
25
26
#ifdef FZ_ENABLE_MD
27
28
#include "cmark-gfm.h"
29
#include "cmark-gfm-core-extensions.h"
30
#include "registry.h"
31
32
#include <ctype.h>
33
34
/* Defaults are all 0's. FIXME: Very subject to change. Possibly might be removed entirely. */
35
typedef struct
36
{
37
  int dummy;
38
}
39
fz_md_to_html_opts;
40
41
static void
42
add_extension(fz_context *ctx, cmark_parser *parser, const char *ext)
43
0
{
44
0
  cmark_syntax_extension *syntax_extension = cmark_find_syntax_extension(ext);
45
0
  if (!syntax_extension)
46
0
    fz_throw(ctx, FZ_ERROR_LIBRARY, "cmark %s extension not found", ext);
47
0
  cmark_parser_attach_syntax_extension(parser, syntax_extension);
48
0
}
49
50
static void
51
register_plugins(fz_context *ctx)
52
0
{
53
0
  static int cmark_plugin_registration_once = 0;
54
55
  // Abuse the freetype lock here.
56
0
  fz_lock(ctx, FZ_LOCK_FREETYPE);
57
0
  if (cmark_plugin_registration_once)
58
0
  {
59
0
    fz_unlock(ctx, FZ_LOCK_FREETYPE);
60
0
  }
61
0
  else
62
0
  {
63
0
    fz_try(ctx)
64
0
    {
65
0
      cmark_gfm_core_extensions_ensure_registered();
66
0
      cmark_plugin_registration_once = 1;
67
0
      atexit(cmark_release_plugins);
68
0
    }
69
0
    fz_always(ctx)
70
0
      fz_unlock(ctx, FZ_LOCK_FREETYPE);
71
0
    fz_catch(ctx)
72
0
      fz_rethrow(ctx);
73
0
  }
74
0
}
75
76
static fz_buffer *
77
fz_md_to_html(fz_context *ctx, fz_html_font_set *set, fz_buffer *buffer_in, fz_archive *dir, fz_md_to_html_opts *opts)
78
0
{
79
0
  fz_buffer *buffer_out = NULL;
80
0
  size_t i, len;
81
0
  char *src, *out = NULL;
82
0
  cmark_parser *parser = NULL;
83
0
  cmark_node *document = NULL;
84
  /* CMark provides a way to redirect allocation, but
85
   * stupidly, provides no way to pass in any opaque
86
   * data, so we can't pass an fz_context. So might
87
   * as well live with the defaults for now. */
88
0
  cmark_mem *mem = cmark_get_default_mem_allocator();
89
90
0
  fz_var(buffer_out);
91
0
  fz_var(out);
92
0
  fz_var(parser);
93
0
  fz_var(document);
94
95
0
  fz_terminate_buffer(ctx, buffer_in);
96
0
  len = buffer_in->len-1;
97
0
  src = (char *)buffer_in->data;
98
0
  for (i = 0; i < len; i++)
99
0
    if (src[i] == 0)
100
0
      src[i] = '\n';
101
102
0
  fz_try(ctx)
103
0
  {
104
0
    int options = CMARK_OPT_UNSAFE | CMARK_OPT_LIBERAL_HTML_TAG | CMARK_OPT_FOOTNOTES;
105
106
0
    register_plugins(ctx);
107
108
0
    parser = cmark_parser_new_with_mem(options, mem);
109
0
    add_extension(ctx, parser, "table");
110
0
    add_extension(ctx, parser, "strikethrough");
111
0
    add_extension(ctx, parser, "autolink");
112
0
    add_extension(ctx, parser, "tagfilter");
113
0
    add_extension(ctx, parser, "tasklist");
114
0
    add_extension(ctx, parser, "autoheaderid");
115
116
0
    cmark_parser_feed(parser, src, len);
117
118
0
    document = cmark_parser_finish(parser);
119
120
0
    out = cmark_render_html_with_mem(document, options, cmark_parser_get_syntax_extensions(parser), mem);
121
122
0
    buffer_out = fz_new_buffer_from_copied_data(ctx, (unsigned char *)out, strlen(out)+1);
123
0
  }
124
0
  fz_always(ctx)
125
0
  {
126
0
    if (parser)
127
0
      cmark_parser_free(parser);
128
0
    if (document)
129
0
      cmark_node_free(document);
130
0
    mem->free(out);
131
0
  }
132
0
  fz_catch(ctx)
133
0
  {
134
0
    fz_rethrow(ctx);
135
0
  }
136
137
0
#ifndef NDEBUG
138
0
  if (fz_atoi(getenv("FZ_DEBUG_MARKDOWN")))
139
0
    fz_write_buffer(ctx, fz_stdout(ctx), buffer_out);
140
0
#endif
141
142
0
  return buffer_out;
143
0
}
144
145
/* MD document handler */
146
147
static fz_buffer *
148
md_to_html(fz_context *ctx, fz_html_font_set *set, fz_buffer *buf, fz_archive *zip)
149
0
{
150
0
  fz_md_to_html_opts opts = { 0 };
151
152
0
  return fz_md_to_html(ctx, set, buf, zip, &opts);
153
0
}
154
155
static const fz_htdoc_format_t fz_htdoc_md =
156
{
157
  "Markdown document",
158
  md_to_html,
159
  0, 1,
160
  FZ_HTML_FLAVOR_MARKDOWN
161
};
162
163
static fz_document *
164
md_open_document(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *zip, void *state)
165
0
{
166
0
  return fz_htdoc_open_document_with_stream_and_dir(ctx, file, zip, &fz_htdoc_md);
167
0
}
168
169
static const char *md_extensions[] =
170
{
171
  "md",
172
  NULL
173
};
174
175
static const char *md_mimetypes[] =
176
{
177
  "text/markdown",
178
  NULL
179
};
180
181
/* We are only ever 75% sure here, to allow a 'better' handler, such as sodochandler
182
 * to override us by returning 100. */
183
static int
184
md_recognize_doc_content(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_archive *zip, void **state, fz_document_recognize_state_free_fn **free_state)
185
68
{
186
68
  int ret = 0;
187
188
68
  if (state)
189
68
    *state = NULL;
190
68
  if (free_state)
191
68
    *free_state = NULL;
192
193
68
  if (stream == NULL)
194
0
    return 0;
195
196
68
  fz_var(ret);
197
198
136
  fz_try(ctx)
199
136
  {
200
    // Really crap markdown detector.
201
    // Assume the first line of the file will be a heading,
202
    // so will be <whitespace>#+<whitespace>.
203
68
    int c = fz_read_byte(ctx, stream);
204
205
68
    if (c == EOF)
206
0
      break;
207
208
72
    while (c != EOF && isspace(c))
209
4
      c = fz_read_byte(ctx, stream);
210
211
68
    if (c != '#')
212
68
      break;
213
214
0
    while (c != EOF && c == '#')
215
0
      c = fz_read_byte(ctx, stream);
216
217
0
    if (c == EOF || !isspace(c))
218
0
      break;
219
220
0
    ret = 50;
221
0
  }
222
136
  fz_always(ctx)
223
68
  {
224
68
  }
225
68
  fz_catch(ctx)
226
0
    fz_rethrow(ctx);
227
228
68
  return ret;
229
68
}
230
231
fz_document_handler md_document_handler =
232
{
233
  NULL,
234
  md_open_document,
235
  md_extensions,
236
  md_mimetypes,
237
  md_recognize_doc_content
238
};
239
240
#endif // FZ_ENABLE_MD