/src/mupdf/source/html/md.c
Line | Count | Source |
1 | | // Copyright (C) 2023-2026 Artifex Software, Inc. |
2 | | // |
3 | | // This file is part of MuPDF. |
4 | | // |
5 | | // MuPDF is free software: you can redistribute it and/or modify it under the |
6 | | // terms of the GNU Affero General Public License as published by the Free |
7 | | // Software Foundation, either version 3 of the License, or (at your option) |
8 | | // any later version. |
9 | | // |
10 | | // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY |
11 | | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
12 | | // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more |
13 | | // details. |
14 | | // |
15 | | // You should have received a copy of the GNU Affero General Public License |
16 | | // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> |
17 | | // |
18 | | // Alternative licensing terms are available from the licensor. |
19 | | // For commercial licensing, see <https://www.artifex.com/> or contact |
20 | | // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, |
21 | | // CA 94129, USA, for further information. |
22 | | |
23 | | #include "mupdf/fitz.h" |
24 | | #include "html-imp.h" |
25 | | |
26 | | #ifdef FZ_ENABLE_MD |
27 | | |
28 | | #include "cmark-gfm.h" |
29 | | #include "cmark-gfm-core-extensions.h" |
30 | | #include "registry.h" |
31 | | |
32 | | #include <ctype.h> |
33 | | |
34 | | /* Defaults are all 0's. FIXME: Very subject to change. Possibly might be removed entirely. */ |
35 | | typedef struct |
36 | | { |
37 | | int dummy; |
38 | | } |
39 | | fz_md_to_html_opts; |
40 | | |
41 | | static void |
42 | | add_extension(fz_context *ctx, cmark_parser *parser, const char *ext) |
43 | 0 | { |
44 | 0 | cmark_syntax_extension *syntax_extension = cmark_find_syntax_extension(ext); |
45 | 0 | if (!syntax_extension) |
46 | 0 | fz_throw(ctx, FZ_ERROR_LIBRARY, "cmark %s extension not found", ext); |
47 | 0 | cmark_parser_attach_syntax_extension(parser, syntax_extension); |
48 | 0 | } |
49 | | |
50 | | static void |
51 | | register_plugins(fz_context *ctx) |
52 | 0 | { |
53 | 0 | static int cmark_plugin_registration_once = 0; |
54 | | |
55 | | // Abuse the freetype lock here. |
56 | 0 | fz_lock(ctx, FZ_LOCK_FREETYPE); |
57 | 0 | if (cmark_plugin_registration_once) |
58 | 0 | { |
59 | 0 | fz_unlock(ctx, FZ_LOCK_FREETYPE); |
60 | 0 | } |
61 | 0 | else |
62 | 0 | { |
63 | 0 | fz_try(ctx) |
64 | 0 | { |
65 | 0 | cmark_gfm_core_extensions_ensure_registered(); |
66 | 0 | cmark_plugin_registration_once = 1; |
67 | 0 | atexit(cmark_release_plugins); |
68 | 0 | } |
69 | 0 | fz_always(ctx) |
70 | 0 | fz_unlock(ctx, FZ_LOCK_FREETYPE); |
71 | 0 | fz_catch(ctx) |
72 | 0 | fz_rethrow(ctx); |
73 | 0 | } |
74 | 0 | } |
75 | | |
76 | | static fz_buffer * |
77 | | fz_md_to_html(fz_context *ctx, fz_html_font_set *set, fz_buffer *buffer_in, fz_archive *dir, fz_md_to_html_opts *opts) |
78 | 0 | { |
79 | 0 | fz_buffer *buffer_out = NULL; |
80 | 0 | size_t i, len; |
81 | 0 | char *src, *out = NULL; |
82 | 0 | cmark_parser *parser = NULL; |
83 | 0 | cmark_node *document = NULL; |
84 | | /* CMark provides a way to redirect allocation, but |
85 | | * stupidly, provides no way to pass in any opaque |
86 | | * data, so we can't pass an fz_context. So might |
87 | | * as well live with the defaults for now. */ |
88 | 0 | cmark_mem *mem = cmark_get_default_mem_allocator(); |
89 | |
|
90 | 0 | fz_var(buffer_out); |
91 | 0 | fz_var(out); |
92 | 0 | fz_var(parser); |
93 | 0 | fz_var(document); |
94 | |
|
95 | 0 | fz_terminate_buffer(ctx, buffer_in); |
96 | 0 | len = buffer_in->len-1; |
97 | 0 | src = (char *)buffer_in->data; |
98 | 0 | for (i = 0; i < len; i++) |
99 | 0 | if (src[i] == 0) |
100 | 0 | src[i] = '\n'; |
101 | |
|
102 | 0 | fz_try(ctx) |
103 | 0 | { |
104 | 0 | int options = CMARK_OPT_UNSAFE | CMARK_OPT_LIBERAL_HTML_TAG | CMARK_OPT_FOOTNOTES; |
105 | |
|
106 | 0 | register_plugins(ctx); |
107 | |
|
108 | 0 | parser = cmark_parser_new_with_mem(options, mem); |
109 | 0 | add_extension(ctx, parser, "table"); |
110 | 0 | add_extension(ctx, parser, "strikethrough"); |
111 | 0 | add_extension(ctx, parser, "autolink"); |
112 | 0 | add_extension(ctx, parser, "tagfilter"); |
113 | 0 | add_extension(ctx, parser, "tasklist"); |
114 | 0 | add_extension(ctx, parser, "autoheaderid"); |
115 | |
|
116 | 0 | cmark_parser_feed(parser, src, len); |
117 | |
|
118 | 0 | document = cmark_parser_finish(parser); |
119 | |
|
120 | 0 | out = cmark_render_html_with_mem(document, options, cmark_parser_get_syntax_extensions(parser), mem); |
121 | |
|
122 | 0 | buffer_out = fz_new_buffer_from_copied_data(ctx, (unsigned char *)out, strlen(out)+1); |
123 | 0 | } |
124 | 0 | fz_always(ctx) |
125 | 0 | { |
126 | 0 | if (parser) |
127 | 0 | cmark_parser_free(parser); |
128 | 0 | if (document) |
129 | 0 | cmark_node_free(document); |
130 | 0 | mem->free(out); |
131 | 0 | } |
132 | 0 | fz_catch(ctx) |
133 | 0 | { |
134 | 0 | fz_rethrow(ctx); |
135 | 0 | } |
136 | | |
137 | 0 | #ifndef NDEBUG |
138 | 0 | if (fz_atoi(getenv("FZ_DEBUG_MARKDOWN"))) |
139 | 0 | fz_write_buffer(ctx, fz_stdout(ctx), buffer_out); |
140 | 0 | #endif |
141 | |
|
142 | 0 | return buffer_out; |
143 | 0 | } |
144 | | |
145 | | /* MD document handler */ |
146 | | |
147 | | static fz_buffer * |
148 | | md_to_html(fz_context *ctx, fz_html_font_set *set, fz_buffer *buf, fz_archive *zip) |
149 | 0 | { |
150 | 0 | fz_md_to_html_opts opts = { 0 }; |
151 | |
|
152 | 0 | return fz_md_to_html(ctx, set, buf, zip, &opts); |
153 | 0 | } |
154 | | |
155 | | static const fz_htdoc_format_t fz_htdoc_md = |
156 | | { |
157 | | "Markdown document", |
158 | | md_to_html, |
159 | | 0, 1, |
160 | | FZ_HTML_FLAVOR_MARKDOWN |
161 | | }; |
162 | | |
163 | | static fz_document * |
164 | | md_open_document(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *zip, void *state) |
165 | 0 | { |
166 | 0 | return fz_htdoc_open_document_with_stream_and_dir(ctx, file, zip, &fz_htdoc_md); |
167 | 0 | } |
168 | | |
169 | | static const char *md_extensions[] = |
170 | | { |
171 | | "md", |
172 | | NULL |
173 | | }; |
174 | | |
175 | | static const char *md_mimetypes[] = |
176 | | { |
177 | | "text/markdown", |
178 | | NULL |
179 | | }; |
180 | | |
181 | | /* We are only ever 75% sure here, to allow a 'better' handler, such as sodochandler |
182 | | * to override us by returning 100. */ |
183 | | static int |
184 | | md_recognize_doc_content(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_archive *zip, void **state, fz_document_recognize_state_free_fn **free_state) |
185 | 68 | { |
186 | 68 | int ret = 0; |
187 | | |
188 | 68 | if (state) |
189 | 68 | *state = NULL; |
190 | 68 | if (free_state) |
191 | 68 | *free_state = NULL; |
192 | | |
193 | 68 | if (stream == NULL) |
194 | 0 | return 0; |
195 | | |
196 | 68 | fz_var(ret); |
197 | | |
198 | 136 | fz_try(ctx) |
199 | 136 | { |
200 | | // Really crap markdown detector. |
201 | | // Assume the first line of the file will be a heading, |
202 | | // so will be <whitespace>#+<whitespace>. |
203 | 68 | int c = fz_read_byte(ctx, stream); |
204 | | |
205 | 68 | if (c == EOF) |
206 | 0 | break; |
207 | | |
208 | 72 | while (c != EOF && isspace(c)) |
209 | 4 | c = fz_read_byte(ctx, stream); |
210 | | |
211 | 68 | if (c != '#') |
212 | 68 | break; |
213 | | |
214 | 0 | while (c != EOF && c == '#') |
215 | 0 | c = fz_read_byte(ctx, stream); |
216 | |
|
217 | 0 | if (c == EOF || !isspace(c)) |
218 | 0 | break; |
219 | | |
220 | 0 | ret = 50; |
221 | 0 | } |
222 | 136 | fz_always(ctx) |
223 | 68 | { |
224 | 68 | } |
225 | 68 | fz_catch(ctx) |
226 | 0 | fz_rethrow(ctx); |
227 | | |
228 | 68 | return ret; |
229 | 68 | } |
230 | | |
231 | | fz_document_handler md_document_handler = |
232 | | { |
233 | | NULL, |
234 | | md_open_document, |
235 | | md_extensions, |
236 | | md_mimetypes, |
237 | | md_recognize_doc_content |
238 | | }; |
239 | | |
240 | | #endif // FZ_ENABLE_MD |