/src/mupdf/source/html/html-parse.c
Line | Count | Source |
1 | | // Copyright (C) 2004-2025 Artifex Software, Inc. |
2 | | // |
3 | | // This file is part of MuPDF. |
4 | | // |
5 | | // MuPDF is free software: you can redistribute it and/or modify it under the |
6 | | // terms of the GNU Affero General Public License as published by the Free |
7 | | // Software Foundation, either version 3 of the License, or (at your option) |
8 | | // any later version. |
9 | | // |
10 | | // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY |
11 | | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
12 | | // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more |
13 | | // details. |
14 | | // |
15 | | // You should have received a copy of the GNU Affero General Public License |
16 | | // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> |
17 | | // |
18 | | // Alternative licensing terms are available from the licensor. |
19 | | // For commercial licensing, see <https://www.artifex.com/> or contact |
20 | | // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, |
21 | | // CA 94129, USA, for further information. |
22 | | |
23 | | #include "mupdf/fitz.h" |
24 | | #include "mupdf/ucdn.h" |
25 | | #include "html-imp.h" |
26 | | |
27 | | #include <string.h> |
28 | | #include <stdio.h> |
29 | | #include <assert.h> |
30 | | |
31 | | enum { T, R, B, L }; |
32 | | |
33 | | static const char *html_default_css = |
34 | | "@page{margin:3em 2em}" |
35 | | "a:link{color:blue;text-decoration:underline}" |
36 | | "address{display:block;font-style:italic}" |
37 | | "b{font-weight:bold}" |
38 | | "bdo{direction:rtl;unicode-bidi:bidi-override}" |
39 | | "blockquote{display:block;margin:1em 40px}" |
40 | | "body{display:block;margin:1em}" |
41 | | "cite{font-style:italic}" |
42 | | "code{font-family:monospace}" |
43 | | "dd{display:block;margin:0 0 0 40px}" |
44 | | "del{text-decoration:line-through}" |
45 | | "div{display:block}" |
46 | | "dl{display:block;margin:1em 0}" |
47 | | "dt{display:block}" |
48 | | "em{font-style:italic}" |
49 | | "h1{display:block;font-size:2em;font-weight:bold;margin:0.67em 0;page-break-after:avoid}" |
50 | | "h2{display:block;font-size:1.5em;font-weight:bold;margin:0.83em 0;page-break-after:avoid}" |
51 | | "h3{display:block;font-size:1.17em;font-weight:bold;margin:1em 0;page-break-after:avoid}" |
52 | | "h4{display:block;font-size:1em;font-weight:bold;margin:1.33em 0;page-break-after:avoid}" |
53 | | "h5{display:block;font-size:0.83em;font-weight:bold;margin:1.67em 0;page-break-after:avoid}" |
54 | | "h6{display:block;font-size:0.67em;font-weight:bold;margin:2.33em 0;page-break-after:avoid}" |
55 | | "head{display:none}" |
56 | | "hr{border-style:solid;border-width:1px;display:block;margin-bottom:0.5em;margin-top:0.5em;text-align:center}" |
57 | | "html{display:block}" |
58 | | "i{font-style:italic}" |
59 | | "ins{text-decoration:underline}" |
60 | | "kbd{font-family:monospace}" |
61 | | "li{display:list-item}" |
62 | | "menu{display:block;list-style-type:disc;margin:1em 0;padding:0 0 0 30pt}" |
63 | | "ol{display:block;list-style-type:decimal;margin:1em 0;padding:0 0 0 30pt}" |
64 | | "p{display:block;margin:1em 0}" |
65 | | "pre{display:block;font-family:monospace;margin:1em 0;white-space:pre}" |
66 | | "samp{font-family:monospace}" |
67 | | "script{display:none}" |
68 | | "small{font-size:0.83em}" |
69 | | "strong{font-weight:bold}" |
70 | | "style{display:none}" |
71 | | "sub{font-size:0.83em;vertical-align:sub}" |
72 | | "sup{font-size:0.83em;vertical-align:super}" |
73 | | "table{display:table;border-spacing:2px}" |
74 | | "tbody{display:table-row-group}" |
75 | | "td{display:table-cell;padding:1px;background-color:inherit}" |
76 | | "tfoot{display:table-footer-group}" |
77 | | "th{display:table-cell;font-weight:bold;padding:1px;text-align:center;background-color:inherit}" |
78 | | "thead{display:table-header-group}" |
79 | | "tr{display:table-row}" |
80 | | "ul{display:block;list-style-type:disc;margin:1em 0;padding:0 0 0 30pt}" |
81 | | "ul ul{list-style-type:circle}" |
82 | | "ul ul ul{list-style-type:square}" |
83 | | "var{font-style:italic}" |
84 | | "colgroup{display:table-column-group}" |
85 | | "col{display:table-column}" |
86 | | "figcaption,caption{display:block;text-align:center}" |
87 | | "address,article,aside,figure,footer,header,hgroup,main,nav,section,search{display:block}" |
88 | | ; |
89 | | |
90 | | static const char *mobi_default_css = |
91 | | "pagebreak{display:block;page-break-before:always}" |
92 | | "dl,ol,ul{margin:0}" |
93 | | "p{margin:0}" |
94 | | "blockquote{margin:0 40px}" |
95 | | "center{display:block;text-align:center}" |
96 | | "big{font-size:1.17em}" |
97 | | "strike{text-decoration:line-through}" |
98 | | ; |
99 | | |
100 | | static const char *fb2_default_css = |
101 | | "@page{margin:3em 2em}" |
102 | | "FictionBook{display:block;margin:1em}" |
103 | | "stylesheet,binary{display:none}" |
104 | | "description>*{display:none}" |
105 | | "description>title-info{display:block}" |
106 | | "description>title-info>*{display:none}" |
107 | | "description>title-info>coverpage{display:block;page-break-before:always;page-break-after:always}" |
108 | | "body,section,title,subtitle,p,cite,epigraph,text-author,date,poem,stanza,v,empty-line{display:block}" |
109 | | "image{display:block}" |
110 | | "p>image{display:inline}" |
111 | | "table{display:table}" |
112 | | "tr{display:table-row}" |
113 | | "th,td{display:table-cell}" |
114 | | "a{color:blue;text-decoration:underline}" |
115 | | "a[type=note]{font-size:small;vertical-align:super}" |
116 | | "code{white-space:pre;font-family:monospace}" |
117 | | "emphasis{font-style:italic}" |
118 | | "strikethrough{text-decoration:line-through}" |
119 | | "strong{font-weight:bold}" |
120 | | "sub{font-size:small;vertical-align:sub}" |
121 | | "sup{font-size:small;vertical-align:super}" |
122 | | "image{margin:1em 0;text-align:center}" |
123 | | "cite,poem{margin:1em 2em}" |
124 | | "subtitle,epigraph,stanza{margin:1em 0}" |
125 | | "title>p{text-align:center;font-size:x-large}" |
126 | | "subtitle{text-align:center;font-size:large}" |
127 | | "p{margin-top:1em;text-align:justify}" |
128 | | "empty-line{padding-top:1em}" |
129 | | "p+p{margin-top:0;text-indent:1.5em}" |
130 | | "empty-line+p{margin-top:0}" |
131 | | "section>title{page-break-before:always}" |
132 | | ; |
133 | | |
134 | | static const char *known_html_tags[] = { |
135 | | // TODO: add known FB2 tags? |
136 | | // Sorted list of all HTML tags. |
137 | | "a", "abbr", "acronym", "address", "annotation-xml", "applet", "area", |
138 | | "article", "aside", "audio", "b", "base", "basefont", "bdi", "bdo", |
139 | | "bgsound", "big", "blink", "blockquote", "body", "br", "button", |
140 | | "canvas", "caption", "center", "cite", "code", "col", "colgroup", |
141 | | "data", "datalist", "dd", "del", "desc", "details", "dfn", "dir", |
142 | | "div", "dl", "dt", "em", "embed", "fieldset", "figcaption", "figure", |
143 | | "font", "footer", "foreignobject", "form", "frame", "frameset", "h1", |
144 | | "h2", "h3", "h4", "h5", "h6", "head", "header", "hgroup", "hr", "html", |
145 | | "i", "iframe", "image", "img", "input", "ins", "isindex", "kbd", |
146 | | "keygen", "label", "legend", "li", "link", "listing", "main", |
147 | | "malignmark", "map", "mark", "marquee", "math", "menu", "menuitem", |
148 | | "meta", "meter", "mglyph", "mi", "mn", "mo", "ms", "mtext", "multicol", |
149 | | "nav", "nextid", "nobr", "noembed", "noframes", "noscript", "object", |
150 | | "ol", "optgroup", "option", "output", "p", "param", "plaintext", "pre", |
151 | | "progress", "q", "rb", "rp", "rt", "rtc", "ruby", "s", "samp", |
152 | | "script", "section", "select", "small", "source", "spacer", "span", |
153 | | "strike", "strong", "style", "sub", "summary", "sup", "svg", "table", |
154 | | "tbody", "td", "template", "textarea", "tfoot", "th", "thead", "time", |
155 | | "title", "tr", "track", "tt", "u", "ul", "var", "video", "wbr", "xmp", |
156 | | }; |
157 | | |
158 | | static const char *known_fb2_tags[] = { |
159 | | "FictionBook", "a", "binary", "body", "cite", "code", "coverpage", |
160 | | "date", "description", "emphasis", "empty-line", "epigraph", "image", |
161 | | "p", "poem", "section", "stanza", "strikethrough", "strong", |
162 | | "stylesheet", "sub", "subtitle", "sup", "table", "td", "text-author", |
163 | | "th", "title", "title-info", "tr", "v", |
164 | | }; |
165 | | |
166 | | static const char *find_known_html_tag(const char *tag) |
167 | 0 | { |
168 | 0 | int l = 0; |
169 | 0 | int r = nelem(known_html_tags) / 2 - 1; |
170 | 0 | while (l <= r) |
171 | 0 | { |
172 | 0 | int m = (l + r) >> 1; |
173 | 0 | int c = strcmp(tag, known_html_tags[m]); |
174 | 0 | if (c < 0) |
175 | 0 | r = m - 1; |
176 | 0 | else if (c > 0) |
177 | 0 | l = m + 1; |
178 | 0 | else |
179 | 0 | return known_html_tags[m]; |
180 | 0 | } |
181 | 0 | return NULL; |
182 | 0 | } |
183 | | |
184 | | static const char *find_known_fb2_tag(const char *tag) |
185 | 0 | { |
186 | 0 | int l = 0; |
187 | 0 | int r = nelem(known_fb2_tags) / 2 - 1; |
188 | 0 | while (l <= r) |
189 | 0 | { |
190 | 0 | int m = (l + r) >> 1; |
191 | 0 | int c = strcmp(tag, known_fb2_tags[m]); |
192 | 0 | if (c < 0) |
193 | 0 | r = m - 1; |
194 | 0 | else if (c > 0) |
195 | 0 | l = m + 1; |
196 | 0 | else |
197 | 0 | return known_fb2_tags[m]; |
198 | 0 | } |
199 | 0 | return NULL; |
200 | 0 | } |
201 | | |
202 | | typedef struct |
203 | | { |
204 | | int maxcols; |
205 | | int ncols; |
206 | | col_style *styles; |
207 | | } |
208 | | table_styles; |
209 | | |
210 | | static void |
211 | | drop_table_styles(fz_context *ctx, table_styles *ts) |
212 | 0 | { |
213 | 0 | fz_free(ctx, ts->styles); |
214 | 0 | ts->styles = NULL; |
215 | 0 | } |
216 | | |
217 | | struct genstate |
218 | | { |
219 | | fz_pool *pool; |
220 | | fz_html_font_set *set; |
221 | | fz_archive *zip; |
222 | | fz_tree *images; |
223 | | fz_xml_doc *xml; |
224 | | int is_fb2; |
225 | | const char *base_uri; |
226 | | fz_css *css; |
227 | | int at_bol; |
228 | | fz_html_box *emit_white; |
229 | | int last_brk_cls; |
230 | | |
231 | | int list_counter; |
232 | | int section_depth; |
233 | | fz_bidi_direction markup_dir; |
234 | | fz_text_language markup_lang; |
235 | | char *href; |
236 | | |
237 | | table_styles tab_styles; |
238 | | int col_num; |
239 | | |
240 | | fz_css_style_splay *styles; |
241 | | }; |
242 | | |
243 | | static int iswhite(int c) |
244 | 0 | { |
245 | 0 | return c == ' ' || c == '\t' || c == '\r' || c == '\n'; |
246 | 0 | } |
247 | | |
248 | | static int is_all_white(const char *s) |
249 | 0 | { |
250 | 0 | while (*s) |
251 | 0 | { |
252 | 0 | if (!iswhite(*s)) |
253 | 0 | return 0; |
254 | 0 | ++s; |
255 | 0 | } |
256 | 0 | return 1; |
257 | 0 | } |
258 | | |
259 | | /* TODO: pool allocator for flow nodes */ |
260 | | /* TODO: store text by pointing to a giant buffer */ |
261 | | |
262 | | static void fz_drop_html_flow(fz_context *ctx, fz_html_flow *flow) |
263 | 0 | { |
264 | 0 | while (flow) |
265 | 0 | { |
266 | 0 | fz_html_flow *next = flow->next; |
267 | 0 | if (flow->type == FLOW_IMAGE) |
268 | 0 | fz_drop_image(ctx, flow->content.image); |
269 | 0 | flow = next; |
270 | 0 | } |
271 | 0 | } |
272 | | |
273 | | static fz_html_flow *add_flow(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box, int type, int extras) |
274 | 0 | { |
275 | 0 | size_t size = (type == FLOW_IMAGE ? sizeof(fz_html_flow) : offsetof(fz_html_flow, content) + extras); |
276 | 0 | fz_html_flow *flow; |
277 | | |
278 | | /* Shouldn't happen, but bug 705324. */ |
279 | 0 | if (top == NULL || top->type != BOX_FLOW) |
280 | 0 | return NULL; |
281 | | |
282 | 0 | flow = fz_pool_alloc(ctx, pool, size); |
283 | 0 | flow->type = type; |
284 | 0 | flow->expand = 0; |
285 | 0 | flow->bidi_level = 0; |
286 | 0 | flow->markup_lang = 0; |
287 | 0 | flow->breaks_line = 0; |
288 | 0 | flow->box = inline_box; |
289 | 0 | (*top->s.build.flow_tail) = flow; |
290 | 0 | top->s.build.flow_tail = &flow->next; |
291 | 0 | return flow; |
292 | 0 | } |
293 | | |
294 | | static void add_flow_space(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box) |
295 | 0 | { |
296 | 0 | fz_html_flow *flow = add_flow(ctx, pool, top, inline_box, FLOW_SPACE, 0); |
297 | 0 | if (flow) |
298 | 0 | flow->expand = 1; |
299 | 0 | } |
300 | | |
301 | | static void add_flow_break(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box) |
302 | 0 | { |
303 | 0 | (void)add_flow(ctx, pool, top, inline_box, FLOW_BREAK, 0); |
304 | 0 | } |
305 | | |
306 | | static void add_flow_sbreak(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box) |
307 | 0 | { |
308 | 0 | (void)add_flow(ctx, pool, top, inline_box, FLOW_SBREAK, 0); |
309 | 0 | } |
310 | | |
311 | | static void add_flow_shyphen(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box) |
312 | 0 | { |
313 | 0 | (void)add_flow(ctx, pool, top, inline_box, FLOW_SHYPHEN, 0); |
314 | 0 | } |
315 | | |
316 | | static void add_flow_word(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box, const char *a, const char *b, int lang) |
317 | 0 | { |
318 | 0 | fz_html_flow *flow = add_flow(ctx, pool, top, inline_box, FLOW_WORD, b - a + 1); |
319 | 0 | if (flow == NULL) |
320 | 0 | return; |
321 | 0 | memcpy(flow->content.text, a, b - a); |
322 | 0 | flow->content.text[b - a] = 0; |
323 | 0 | flow->markup_lang = lang; |
324 | 0 | } |
325 | | |
326 | | static void add_flow_image(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box, fz_image *img) |
327 | 0 | { |
328 | 0 | fz_html_flow *flow = add_flow(ctx, pool, top, inline_box, FLOW_IMAGE, 0); |
329 | 0 | if (flow) |
330 | 0 | flow->content.image = fz_keep_image(ctx, img); |
331 | 0 | } |
332 | | |
333 | | static void add_flow_anchor(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box) |
334 | 0 | { |
335 | 0 | (void)add_flow(ctx, pool, top, inline_box, FLOW_ANCHOR, 0); |
336 | 0 | } |
337 | | |
338 | | fz_html_flow *fz_html_split_flow(fz_context *ctx, fz_pool *pool, fz_html_flow *flow, size_t offset) |
339 | 0 | { |
340 | 0 | fz_html_flow *new_flow; |
341 | 0 | char *text; |
342 | 0 | size_t len; |
343 | |
|
344 | 0 | assert(flow->type == FLOW_WORD); |
345 | |
|
346 | 0 | if (offset == 0) |
347 | 0 | return flow; |
348 | 0 | text = flow->content.text; |
349 | 0 | while (*text && offset) |
350 | 0 | { |
351 | 0 | int rune; |
352 | 0 | text += fz_chartorune(&rune, text); |
353 | 0 | offset--; |
354 | 0 | } |
355 | 0 | len = strlen(text); |
356 | 0 | new_flow = fz_pool_alloc(ctx, pool, offsetof(fz_html_flow, content) + len+1); |
357 | 0 | memcpy(new_flow, flow, offsetof(fz_html_flow, content)); |
358 | 0 | new_flow->next = flow->next; |
359 | 0 | flow->next = new_flow; |
360 | 0 | strcpy(new_flow->content.text, text); |
361 | 0 | *text = 0; |
362 | 0 | return new_flow; |
363 | 0 | } |
364 | | |
365 | | static void flush_space(fz_context *ctx, fz_html_box *flow, int lang, struct genstate *g) |
366 | 0 | { |
367 | 0 | static const char *space = " "; |
368 | 0 | fz_pool *pool = g->pool; |
369 | 0 | if (g->emit_white) |
370 | 0 | { |
371 | 0 | int bsp = g->emit_white->style->white_space & WS_ALLOW_BREAK_SPACE; |
372 | 0 | if (!g->at_bol) |
373 | 0 | { |
374 | 0 | if (bsp) |
375 | 0 | add_flow_space(ctx, pool, flow, g->emit_white); |
376 | 0 | else |
377 | 0 | add_flow_word(ctx, pool, flow, g->emit_white, space, space+1, lang); |
378 | 0 | } |
379 | 0 | g->emit_white = 0; |
380 | 0 | } |
381 | 0 | } |
382 | | |
383 | | /* pair-wise lookup table for UAX#14 linebreaks |
384 | | The linebreak table entries mean: |
385 | | ^ prohibited break |
386 | | never break before A and after B, even with one or more spaces in between |
387 | | % indirect break |
388 | | do not break before A, unless one or more spaces follow B |
389 | | _ direct break |
390 | | break allowed before A |
391 | | */ |
392 | | static const char *pairbrk[32] = |
393 | | { |
394 | | /* -OCCQGNESIPPNAHIIHBBBZCWHHJJJREEZ- */ |
395 | | /* -PLPULSXYSROULLDNYAB2WMJ23LVTIBMW- */ |
396 | | /* - J- */ |
397 | | "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^", /* OP open punctuation */ |
398 | | "_^^%%^^^^%%____%%%__^^^________%", /* CL close punctuation */ |
399 | | "_^^%%^^^^%%%%%_%%%__^^^________%", /* CP close parenthesis */ |
400 | | "^^^%%%^^^%%%%%%%%%%%^^^%%%%%%%%%", /* QU quotation */ |
401 | | "%^^%%%^^^%%%%%%%%%%%^^^%%%%%%%%%", /* GL non-breaking glue */ |
402 | | "_^^%%%^^^______%%%__^^^________%", /* NS nonstarters */ |
403 | | "_^^%%%^^^______%%%__^^^________%", /* EX exclamation/interrogation */ |
404 | | "_^^%%%^^^__%_%_%%%__^^^________%", /* SY symbols allowing break after */ |
405 | | "_^^%%%^^^__%%%_%%%__^^^________%", /* IS infix numeric separator */ |
406 | | "%^^%%%^^^__%%%%%%%__^^^%%%%%_%%%", /* PR prefix numeric */ |
407 | | "%^^%%%^^^__%%%_%%%__^^^________%", /* PO postfix numeric */ |
408 | | "%^^%%%^^^%%%%%_%%%__^^^________%", /* NU numeric */ |
409 | | "%^^%%%^^^%%%%%_%%%__^^^________%", /* AL ordinary alphabetic and symbol characters */ |
410 | | "%^^%%%^^^%%%%%_%%%__^^^________%", /* HL hebrew letter */ |
411 | | "_^^%%%^^^_%____%%%__^^^________%", /* ID ideographic */ |
412 | | "_^^%%%^^^______%%%__^^^________%", /* IN inseparable characters */ |
413 | | "_^^%_%^^^__%___%%%__^^^________%", /* HY hyphens */ |
414 | | "_^^%_%^^^______%%%__^^^________%", /* BA break after */ |
415 | | "%^^%%%^^^%%%%%%%%%%%^^^%%%%%%%%%", /* BB break before */ |
416 | | "_^^%%%^^^______%%%_^^^^________%", /* B2 break opportunity before and after */ |
417 | | "____________________^___________", /* ZW zero width space */ |
418 | | "%^^%%%^^^%_%%%_%%%__^^^________%", /* CM combining mark */ |
419 | | "%^^%%%^^^%%%%%%%%%%%^^^%%%%%%%%%", /* WJ word joiner */ |
420 | | "_^^%%%^^^_%____%%%__^^^___%%___%", /* H2 hangul leading/vowel syllable */ |
421 | | "_^^%%%^^^_%____%%%__^^^____%___%", /* H3 hangul leading/vowel/trailing syllable */ |
422 | | "_^^%%%^^^_%____%%%__^^^%%%%____%", /* JL hangul leading jamo */ |
423 | | "_^^%%%^^^_%____%%%__^^^___%%___%", /* JV hangul vowel jamo */ |
424 | | "_^^%%%^^^_%____%%%__^^^____%___%", /* JT hangul trailing jamo */ |
425 | | "_^^%%%^^^______%%%__^^^_____%__%", /* RI regional indicator */ |
426 | | "_^^%%%^^^_%____%%%__^^^_______%%", /* EB emoji base */ |
427 | | "_^^%%%^^^_%____%%%__^^^________%", /* EM emoji modifier */ |
428 | | "%^^%%%^^^%%%%%%%%%%%^^^%%%%%%%%%", /* ZWJ zero width joiner */ |
429 | | }; |
430 | | |
431 | | static fz_html_box * |
432 | | find_flow_encloser(fz_context *ctx, fz_html_box *flow) |
433 | 0 | { |
434 | | /* This code was written to assume that there will always be a |
435 | | * flow box enclosing callers of this. Bug 705324 shows that |
436 | | * this isn't always the case. In the absence of a reproducer |
437 | | * file, all I can do is try to patch around the issue so that |
438 | | * we won't crash. */ |
439 | 0 | while (flow->type != BOX_FLOW) |
440 | 0 | { |
441 | 0 | if (flow->up == NULL) |
442 | 0 | { |
443 | 0 | fz_warn(ctx, "Flow encloser not found. Please report this file!"); |
444 | 0 | break; |
445 | 0 | } |
446 | 0 | flow = flow->up; |
447 | 0 | } |
448 | 0 | return flow; |
449 | 0 | } |
450 | | |
451 | | static void |
452 | | generate_text_run(fz_context *ctx, fz_html_box *box, fz_html_box *flow, const char *mark, const char *end, int lang, struct genstate *g) |
453 | 0 | { |
454 | 0 | fz_pool *pool = g->pool; |
455 | 0 | int bsp = box->style->white_space & WS_ALLOW_BREAK_SPACE; |
456 | 0 | const char *text = mark; |
457 | 0 | const char *prev; |
458 | 0 | int c; |
459 | |
|
460 | 0 | while (text < end) |
461 | 0 | { |
462 | 0 | prev = text; |
463 | 0 | text += fz_chartorune(&c, text); |
464 | 0 | if (c == 0xAD) /* soft hyphen */ |
465 | 0 | { |
466 | 0 | if (mark != prev) |
467 | 0 | add_flow_word(ctx, pool, flow, box, mark, prev, lang); |
468 | 0 | if (box->style->hyphens != HYP_NONE) |
469 | 0 | add_flow_shyphen(ctx, pool, flow, box); |
470 | 0 | mark = text; |
471 | 0 | g->last_brk_cls = UCDN_LINEBREAK_CLASS_WJ; /* don't add sbreaks after a soft hyphen */ |
472 | 0 | } |
473 | 0 | else if (bsp) /* allow soft breaks */ |
474 | 0 | { |
475 | 0 | int this_brk_cls = ucdn_get_resolved_linebreak_class(c); |
476 | 0 | if (this_brk_cls <= UCDN_LINEBREAK_CLASS_ZWJ) |
477 | 0 | { |
478 | 0 | int brk = pairbrk[g->last_brk_cls][this_brk_cls]; |
479 | | |
480 | | /* we handle spaces elsewhere, so ignore these classes */ |
481 | 0 | if (brk == '@') brk = '^'; |
482 | 0 | if (brk == '#') brk = '^'; |
483 | 0 | if (brk == '%') brk = '^'; |
484 | |
|
485 | 0 | if (brk == '_') |
486 | 0 | { |
487 | 0 | if (mark != prev) |
488 | 0 | add_flow_word(ctx, pool, flow, box, mark, prev, lang); |
489 | 0 | add_flow_sbreak(ctx, pool, flow, box); |
490 | 0 | mark = prev; |
491 | 0 | } |
492 | |
|
493 | 0 | g->last_brk_cls = this_brk_cls; |
494 | 0 | } |
495 | 0 | } |
496 | 0 | } |
497 | 0 | if (mark != text) |
498 | 0 | add_flow_word(ctx, pool, flow, box, mark, text, lang); |
499 | 0 | } |
500 | | |
501 | | static void |
502 | | generate_text_run_with_hyphens(fz_context *ctx, fz_html_box *box, fz_html_box *flow, const char *mark, const char *end, int lang, fz_hyphenator *hyph, struct genstate *g) |
503 | 0 | { |
504 | 0 | char word[256]; |
505 | 0 | int size = end - mark; |
506 | 0 | if (size < 64) |
507 | 0 | { |
508 | 0 | fz_hyphenate_word(ctx, hyph, mark, size, word, sizeof word); |
509 | 0 | generate_text_run(ctx, box, flow, word, word + strlen(word), lang, g); |
510 | 0 | } |
511 | 0 | else |
512 | 0 | { |
513 | 0 | generate_text_run(ctx, box, flow, mark, end, lang, g); |
514 | 0 | } |
515 | 0 | } |
516 | | |
517 | | static int fz_isletter_or_apos(int c) |
518 | 0 | { |
519 | 0 | int cat; |
520 | 0 | if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '\'' || c == 0x2019) |
521 | 0 | return 1; |
522 | 0 | cat = ucdn_get_general_category(c); |
523 | 0 | return cat >= UCDN_GENERAL_CATEGORY_LL && cat <= UCDN_GENERAL_CATEGORY_LU; |
524 | 0 | } |
525 | | |
526 | | static void generate_text(fz_context *ctx, fz_html_box *box, const char *text, int lang, struct genstate *g) |
527 | 0 | { |
528 | 0 | fz_html_box *flow; |
529 | 0 | fz_pool *pool = g->pool; |
530 | 0 | int collapse = box->style->white_space & WS_COLLAPSE; |
531 | 0 | int bsp = box->style->white_space & WS_ALLOW_BREAK_SPACE; |
532 | 0 | int bnl = box->style->white_space & WS_FORCE_BREAK_NEWLINE; |
533 | 0 | fz_hyphenator *hyph = NULL; |
534 | 0 | int c, n; |
535 | |
|
536 | 0 | static const char *space = " "; |
537 | |
|
538 | 0 | flow = find_flow_encloser(ctx, box); |
539 | 0 | if (flow == NULL) |
540 | 0 | return; |
541 | | |
542 | 0 | if (box->style->hyphens == HYP_AUTO && lang != FZ_LANG_UNSET) |
543 | 0 | { |
544 | 0 | hyph = fz_lookup_hyphenator(ctx, lang); |
545 | 0 | if (!hyph) |
546 | 0 | { |
547 | 0 | char tmp[8]; |
548 | 0 | fz_warn(ctx, "no hyphenation table for lang='%s'", fz_string_from_text_language(tmp, lang)); |
549 | 0 | } |
550 | 0 | } |
551 | |
|
552 | 0 | while (*text) |
553 | 0 | { |
554 | 0 | if (bnl && (*text == '\n' || *text == '\r')) |
555 | 0 | { |
556 | 0 | if (text[0] == '\r' && text[1] == '\n') |
557 | 0 | text += 2; |
558 | 0 | else |
559 | 0 | text += 1; |
560 | 0 | add_flow_break(ctx, pool, flow, box); |
561 | 0 | g->at_bol = 1; |
562 | 0 | } |
563 | 0 | else if (iswhite(*text)) |
564 | 0 | { |
565 | 0 | if (collapse) |
566 | 0 | { |
567 | 0 | if (bnl) |
568 | 0 | while (*text == ' ' || *text == '\t') |
569 | 0 | ++text; |
570 | 0 | else |
571 | 0 | while (iswhite(*text)) |
572 | 0 | ++text; |
573 | 0 | g->emit_white = box; |
574 | 0 | } |
575 | 0 | else |
576 | 0 | { |
577 | | // TODO: tabs |
578 | 0 | if (bsp) |
579 | 0 | add_flow_space(ctx, pool, flow, box); |
580 | 0 | else |
581 | 0 | add_flow_word(ctx, pool, flow, box, space, space+1, lang); |
582 | 0 | ++text; |
583 | 0 | } |
584 | 0 | g->last_brk_cls = UCDN_LINEBREAK_CLASS_WJ; /* don't add sbreaks after a space */ |
585 | 0 | } |
586 | 0 | else |
587 | 0 | { |
588 | 0 | const char *mark = text; |
589 | |
|
590 | 0 | flush_space(ctx, flow, lang, g); |
591 | |
|
592 | 0 | if (g->at_bol) |
593 | 0 | g->last_brk_cls = UCDN_LINEBREAK_CLASS_WJ; |
594 | |
|
595 | 0 | while (*text && !iswhite(*text)) |
596 | 0 | ++text; |
597 | |
|
598 | 0 | if (hyph) |
599 | 0 | { |
600 | | // split word into letter and non-letter runs for hyphenator |
601 | 0 | const char *p = mark; |
602 | 0 | n = fz_chartorune(&c, p); |
603 | 0 | while (p < text) |
604 | 0 | { |
605 | 0 | p += n; |
606 | 0 | if (fz_isletter_or_apos(c)) |
607 | 0 | { |
608 | 0 | while (p < text) |
609 | 0 | { |
610 | 0 | n = fz_chartorune(&c, p); |
611 | 0 | if (!fz_isletter_or_apos(c)) |
612 | 0 | break; |
613 | 0 | p += n; |
614 | 0 | } |
615 | 0 | generate_text_run_with_hyphens(ctx, box, flow, mark, p, lang, hyph, g); |
616 | 0 | } |
617 | 0 | else |
618 | 0 | { |
619 | 0 | while (p < text) |
620 | 0 | { |
621 | 0 | n = fz_chartorune(&c, p); |
622 | 0 | if (fz_isletter_or_apos(c)) |
623 | 0 | break; |
624 | 0 | p += n; |
625 | 0 | } |
626 | 0 | generate_text_run(ctx, box, flow, mark, p, lang, g); |
627 | 0 | } |
628 | 0 | mark = p; |
629 | 0 | } |
630 | 0 | } |
631 | 0 | else |
632 | 0 | { |
633 | 0 | generate_text_run(ctx, box, flow, mark, text, lang, g); |
634 | 0 | } |
635 | |
|
636 | 0 | g->at_bol = 0; |
637 | 0 | } |
638 | 0 | } |
639 | 0 | } |
640 | | |
641 | | static fz_image *load_html_image(fz_context *ctx, fz_archive *zip, const char *base_uri, const char *src) |
642 | 0 | { |
643 | 0 | char path[2048]; |
644 | 0 | fz_image *img = NULL; |
645 | 0 | fz_buffer *buf = NULL; |
646 | |
|
647 | 0 | fz_var(img); |
648 | 0 | fz_var(buf); |
649 | |
|
650 | 0 | fz_try(ctx) |
651 | 0 | { |
652 | 0 | if (!strncmp(src, "data:image/jpeg;base64,", 23)) |
653 | 0 | buf = fz_new_buffer_from_base64(ctx, src+23, 0); |
654 | 0 | else if (!strncmp(src, "data:image/png;base64,", 22)) |
655 | 0 | buf = fz_new_buffer_from_base64(ctx, src+22, 0); |
656 | 0 | else if (!strncmp(src, "data:image/gif;base64,", 22)) |
657 | 0 | buf = fz_new_buffer_from_base64(ctx, src+22, 0); |
658 | 0 | else |
659 | 0 | { |
660 | 0 | fz_strlcpy(path, base_uri, sizeof path); |
661 | 0 | fz_strlcat(path, "/", sizeof path); |
662 | 0 | fz_strlcat(path, src, sizeof path); |
663 | 0 | fz_urldecode(path); |
664 | 0 | fz_cleanname(path); |
665 | 0 | buf = fz_read_archive_entry(ctx, zip, path); |
666 | 0 | } |
667 | 0 | #if FZ_ENABLE_SVG |
668 | 0 | if (strstr(src, ".svg")) |
669 | 0 | img = fz_new_image_from_svg(ctx, buf, base_uri, zip); |
670 | 0 | else |
671 | 0 | #endif |
672 | 0 | img = fz_new_image_from_buffer(ctx, buf); |
673 | 0 | } |
674 | 0 | fz_always(ctx) |
675 | 0 | fz_drop_buffer(ctx, buf); |
676 | 0 | fz_catch(ctx) |
677 | 0 | { |
678 | 0 | fz_ignore_error(ctx); |
679 | 0 | fz_warn(ctx, "html: cannot load image src='%s'", src); |
680 | 0 | } |
681 | |
|
682 | 0 | return img; |
683 | 0 | } |
684 | | |
685 | | static fz_image *load_svg_image(fz_context *ctx, fz_archive *zip, const char *base_uri, |
686 | | fz_xml_doc *xmldoc, fz_xml *node) |
687 | 0 | { |
688 | 0 | fz_image *img = NULL; |
689 | 0 | #if FZ_ENABLE_SVG |
690 | 0 | fz_try(ctx) |
691 | 0 | img = fz_new_image_from_svg_xml(ctx, xmldoc, node, base_uri, zip); |
692 | 0 | fz_catch(ctx) |
693 | 0 | { |
694 | 0 | fz_ignore_error(ctx); |
695 | 0 | fz_warn(ctx, "html: cannot load embedded svg document"); |
696 | 0 | } |
697 | 0 | #endif |
698 | 0 | return img; |
699 | 0 | } |
700 | | |
701 | | static void generate_image(fz_context *ctx, fz_html_box *box, fz_image *img, struct genstate *g) |
702 | 0 | { |
703 | 0 | fz_html_box *flow; |
704 | 0 | fz_pool *pool = g->pool; |
705 | |
|
706 | 0 | flow = find_flow_encloser(ctx, box); |
707 | |
|
708 | 0 | flush_space(ctx, flow, 0, g); |
709 | |
|
710 | 0 | if (!img) |
711 | 0 | { |
712 | 0 | const char *alt = "[image]"; |
713 | 0 | add_flow_word(ctx, pool, flow, box, alt, alt + 7, 0); |
714 | 0 | } |
715 | 0 | else |
716 | 0 | { |
717 | 0 | fz_try(ctx) |
718 | 0 | { |
719 | 0 | add_flow_sbreak(ctx, pool, flow, box); |
720 | 0 | add_flow_image(ctx, pool, flow, box, img); |
721 | 0 | add_flow_sbreak(ctx, pool, flow, box); |
722 | 0 | } |
723 | 0 | fz_always(ctx) |
724 | 0 | { |
725 | 0 | fz_drop_image(ctx, img); |
726 | 0 | } |
727 | 0 | fz_catch(ctx) |
728 | 0 | fz_rethrow(ctx); |
729 | 0 | } |
730 | | |
731 | 0 | g->at_bol = 0; |
732 | 0 | } |
733 | | |
734 | | static void fz_drop_html_box(fz_context *ctx, fz_html_box *box) |
735 | 0 | { |
736 | 0 | while (box) |
737 | 0 | { |
738 | 0 | fz_html_box *next = box->next; |
739 | 0 | if (box->type == BOX_FLOW) |
740 | 0 | fz_drop_html_flow(ctx, box->u.flow.head); |
741 | 0 | fz_drop_html_box(ctx, box->down); |
742 | 0 | box = next; |
743 | 0 | } |
744 | 0 | } |
745 | | |
746 | | static void fz_drop_html_imp(fz_context *ctx, fz_storable *stor) |
747 | 0 | { |
748 | 0 | fz_html *html = (fz_html *)stor; |
749 | 0 | fz_drop_html_box(ctx, html->tree.root); |
750 | 0 | fz_drop_pool(ctx, html->tree.pool); |
751 | 0 | } |
752 | | |
753 | | static void fz_drop_story_imp(fz_context *ctx, fz_storable *stor) |
754 | 0 | { |
755 | 0 | fz_story *story = (fz_story *)stor; |
756 | 0 | fz_free(ctx, story->user_css); |
757 | 0 | fz_drop_html_font_set(ctx, story->font_set); |
758 | 0 | fz_drop_xml(ctx, story->dom); |
759 | 0 | fz_drop_html_box(ctx, story->tree.root); |
760 | 0 | fz_drop_buffer(ctx, story->warnings); |
761 | 0 | fz_drop_archive(ctx, story->zip); |
762 | | /* The pool must be the last thing dropped. */ |
763 | 0 | fz_drop_pool(ctx, story->tree.pool); |
764 | 0 | } |
765 | | |
766 | | /* Drop a structure derived from an html_tree. The exact things |
767 | | * freed here will depend upon the drop function with which it |
768 | | * was created. */ |
769 | | static void |
770 | | fz_drop_html_tree(fz_context *ctx, fz_html_tree *tree) |
771 | 0 | { |
772 | 0 | fz_defer_reap_start(ctx); |
773 | 0 | fz_drop_storable(ctx, &tree->storable); |
774 | 0 | fz_defer_reap_end(ctx); |
775 | 0 | } |
776 | | |
777 | | void fz_drop_html(fz_context *ctx, fz_html *html) |
778 | 0 | { |
779 | 0 | fz_drop_html_tree(ctx, &html->tree); |
780 | 0 | } |
781 | | |
782 | | void fz_drop_story(fz_context *ctx, fz_story *story) |
783 | 0 | { |
784 | 0 | if (!story) |
785 | 0 | return; |
786 | | |
787 | 0 | fz_drop_html_tree(ctx, &story->tree); |
788 | 0 | } |
789 | | |
790 | | fz_html *fz_keep_html(fz_context *ctx, fz_html *html) |
791 | 0 | { |
792 | 0 | return fz_keep_storable(ctx, &html->tree.storable); |
793 | 0 | } |
794 | | |
795 | | static fz_html_box *new_box(fz_context *ctx, struct genstate *g, fz_xml *node, int type, fz_css_style *style) |
796 | 0 | { |
797 | 0 | fz_html_box *box; |
798 | 0 | const char *tag = fz_xml_tag(node); |
799 | 0 | const char *id = fz_xml_att(node, "id"); |
800 | 0 | const char *href; |
801 | |
|
802 | 0 | if (type == BOX_INLINE) |
803 | 0 | box = fz_pool_alloc(ctx, g->pool, offsetof(fz_html_box, u)); |
804 | 0 | else if (type == BOX_FLOW) |
805 | 0 | box = fz_pool_alloc(ctx, g->pool, offsetof(fz_html_box, u) + sizeof(box->u.flow)); |
806 | 0 | else |
807 | 0 | box = fz_pool_alloc(ctx, g->pool, offsetof(fz_html_box, u) + sizeof(box->u.block)); |
808 | |
|
809 | 0 | box->type = type; |
810 | 0 | box->is_first_flow = 0; |
811 | 0 | box->markup_dir = g->markup_dir; |
812 | 0 | box->heading = 0; |
813 | 0 | box->list_item = 0; |
814 | |
|
815 | | #ifdef DEBUG_HTML_SEQ |
816 | | { |
817 | | static int seq = 0; |
818 | | box->seq = seq++; |
819 | | } |
820 | | #endif |
821 | |
|
822 | 0 | box->style = fz_css_enlist(ctx, style, &g->styles, g->pool); |
823 | |
|
824 | 0 | if (tag) |
825 | 0 | { |
826 | 0 | box->tag = find_known_html_tag(tag); |
827 | 0 | if (!box->tag && g->is_fb2) |
828 | 0 | box->tag = find_known_fb2_tag(tag); |
829 | 0 | if (!box->tag) |
830 | 0 | box->tag = fz_pool_strdup(ctx, g->pool, tag); |
831 | 0 | } |
832 | 0 | else |
833 | 0 | { |
834 | 0 | box->tag = "#anon"; |
835 | 0 | } |
836 | |
|
837 | 0 | if (id) |
838 | 0 | box->id = fz_pool_strdup(ctx, g->pool, id); |
839 | |
|
840 | 0 | if (tag && tag[0]=='a' && tag[1]==0) |
841 | 0 | { |
842 | | // Support deprecated anchor syntax with id in "name" instead of "id" attribute. |
843 | 0 | if (!id) |
844 | 0 | { |
845 | 0 | const char *name = fz_xml_att(node, "name"); |
846 | 0 | if (name) |
847 | 0 | box->id = fz_pool_strdup(ctx, g->pool, name); |
848 | 0 | } |
849 | |
|
850 | 0 | if (g->is_fb2) |
851 | 0 | { |
852 | 0 | href = fz_xml_att(node, "l:href"); |
853 | 0 | if (!href) |
854 | 0 | href = fz_xml_att(node, "xlink:href"); |
855 | 0 | } |
856 | 0 | else |
857 | 0 | { |
858 | 0 | href = fz_xml_att(node, "href"); |
859 | 0 | } |
860 | 0 | if (href) |
861 | 0 | g->href = fz_pool_strdup(ctx, g->pool, href); |
862 | 0 | } |
863 | |
|
864 | 0 | if (g->href) |
865 | 0 | box->href = g->href; |
866 | |
|
867 | 0 | if (type == BOX_FLOW) |
868 | 0 | { |
869 | 0 | box->u.flow.head = NULL; |
870 | 0 | box->s.build.flow_tail = &box->u.flow.head; |
871 | 0 | } |
872 | |
|
873 | 0 | return box; |
874 | 0 | } |
875 | | |
876 | | static void append_box(fz_context *ctx, fz_html_box *parent, fz_html_box *child) |
877 | 0 | { |
878 | 0 | child->up = parent; |
879 | 0 | if (!parent->down) |
880 | 0 | parent->down = child; |
881 | 0 | if (parent->s.build.last_child) |
882 | 0 | parent->s.build.last_child->next = child; |
883 | 0 | parent->s.build.last_child = child; |
884 | 0 | } |
885 | | |
886 | | static fz_html_box *find_block_context(fz_context *ctx, fz_html_box *box) |
887 | 0 | { |
888 | 0 | while (box->type != BOX_BLOCK && box->type != BOX_TABLE_CELL) |
889 | 0 | box = box->up; |
890 | 0 | return box; |
891 | 0 | } |
892 | | |
893 | | static fz_html_box *find_table_row_context(fz_context *ctx, fz_html_box *box) |
894 | 0 | { |
895 | 0 | fz_html_box *look = box; |
896 | 0 | while (look && look->type != BOX_TABLE) |
897 | 0 | look = look->up; |
898 | 0 | if (look) |
899 | 0 | return look; |
900 | 0 | fz_warn(ctx, "table-row not inside table element"); |
901 | 0 | return NULL; |
902 | 0 | } |
903 | | |
904 | | static fz_html_box *find_table_cell_context(fz_context *ctx, fz_html_box *box) |
905 | 0 | { |
906 | 0 | fz_html_box *look = box; |
907 | 0 | while (look && look->type != BOX_TABLE_ROW) |
908 | 0 | look = look->up; |
909 | 0 | if (look) |
910 | 0 | return look; |
911 | 0 | fz_warn(ctx, "table-cell not inside table-row element"); |
912 | 0 | return NULL; |
913 | 0 | } |
914 | | |
915 | | static fz_html_box *find_inline_context(fz_context *ctx, struct genstate *g, fz_html_box *box) |
916 | 0 | { |
917 | 0 | fz_css_style style; |
918 | 0 | fz_html_box *flow_box; |
919 | |
|
920 | 0 | if (box->type == BOX_FLOW || box->type == BOX_INLINE) |
921 | 0 | return box; |
922 | | |
923 | | // We have an inline element that is not in an existing flow/inline context. |
924 | | |
925 | | // Find the closest block level box to insert content into. |
926 | 0 | while (box->type != BOX_BLOCK && box->type != BOX_TABLE_CELL) |
927 | 0 | box = box->up; |
928 | | |
929 | | // Concatenate onto the last open flow box if we have one. |
930 | 0 | if (box->s.build.last_child && box->s.build.last_child->type == BOX_FLOW) |
931 | 0 | return box->s.build.last_child; |
932 | | |
933 | | // No flow box found, create and insert one! |
934 | | |
935 | | // TODO: null style instead of default for flow box? |
936 | 0 | fz_default_css_style(ctx, &style); |
937 | 0 | flow_box = new_box(ctx, g, NULL, BOX_FLOW, &style); |
938 | 0 | flow_box->is_first_flow = !box->down; |
939 | 0 | g->at_bol = 1; |
940 | |
|
941 | 0 | append_box(ctx, box, flow_box); |
942 | |
|
943 | 0 | return flow_box; |
944 | 0 | } |
945 | | |
946 | | static void gen2_children(fz_context *ctx, struct genstate *g, fz_html_box *root_box, fz_xml *root_node, fz_css_match *root_match); |
947 | | |
948 | | static void |
949 | | apply_attributes_as_styles(fz_context *ctx, fz_css_style *style, fz_xml *node) |
950 | 0 | { |
951 | 0 | const char *att; |
952 | 0 | const char *tag = fz_xml_tag(node); |
953 | |
|
954 | 0 | if (tag == NULL) |
955 | 0 | return; /* No tag -> no attributes. */ |
956 | | |
957 | 0 | if (!strcmp(tag, "canvas") || |
958 | 0 | !strcmp(tag, "embed") || |
959 | 0 | !strcmp(tag, "iframe") || |
960 | 0 | !strcmp(tag, "img") || |
961 | 0 | !strcmp(tag, "input") || |
962 | 0 | !strcmp(tag, "object") || |
963 | 0 | !strcmp(tag, "video")) |
964 | 0 | { |
965 | 0 | att = fz_xml_att(node, "width"); |
966 | 0 | if (att) |
967 | 0 | { |
968 | 0 | style->width.value = fz_atof(att); |
969 | 0 | if (strchr(att,'%')) |
970 | 0 | style->width.unit = N_PERCENT; |
971 | 0 | else |
972 | 0 | style->width.unit = N_LENGTH; |
973 | 0 | } |
974 | |
|
975 | 0 | att = fz_xml_att(node, "height"); |
976 | 0 | if (att) |
977 | 0 | { |
978 | 0 | style->height.value = fz_atof(att); |
979 | 0 | if (strchr(att,'%')) |
980 | 0 | style->height.unit = N_PERCENT; |
981 | 0 | else |
982 | 0 | style->height.unit = N_LENGTH; |
983 | 0 | } |
984 | 0 | } |
985 | |
|
986 | 0 | att = fz_xml_att(node, "valign"); |
987 | 0 | if (!att) |
988 | 0 | {} |
989 | 0 | else if (!strcmp(att, "top")) |
990 | 0 | style->vertical_align = VA_TOP; |
991 | 0 | else if (!strcmp(att, "middle")) |
992 | 0 | style->vertical_align = VA_MIDDLE; |
993 | 0 | else if (!strcmp(att, "bottom")) |
994 | 0 | style->vertical_align = VA_BOTTOM; |
995 | 0 | else if (!strcmp(att, "baseline")) |
996 | 0 | style->vertical_align = VA_BASELINE; |
997 | |
|
998 | 0 | if (!strcmp(tag, "td") || |
999 | 0 | !strcmp(tag, "th")) |
1000 | 0 | { |
1001 | 0 | att = fz_xml_att(node, "rowspan"); |
1002 | 0 | if (att) |
1003 | 0 | { |
1004 | 0 | int i = fz_atoi(att); |
1005 | 0 | style->rowspan = fz_clampi(i, 1, 1000); |
1006 | 0 | } |
1007 | |
|
1008 | 0 | att = fz_xml_att(node, "colspan"); |
1009 | 0 | if (att) |
1010 | 0 | { |
1011 | 0 | int i = fz_atoi(att); |
1012 | 0 | style->colspan = fz_clampi(i, 1, 1000); |
1013 | 0 | } |
1014 | 0 | } |
1015 | | |
1016 | | /* FIXME: We probably need to vary this based on node type; |
1017 | | * for images, it'd need to be "float:left" etc. */ |
1018 | 0 | att = fz_xml_att(node, "align"); |
1019 | 0 | if (!att) |
1020 | 0 | {} |
1021 | 0 | else if (!strcmp(att, "left")) |
1022 | 0 | style->text_align = TA_LEFT; |
1023 | 0 | else if (!strcmp(att, "right")) |
1024 | 0 | style->text_align = TA_RIGHT; |
1025 | 0 | else if (!strcmp(att, "center")) |
1026 | 0 | style->text_align = TA_CENTER; |
1027 | 0 | else if (!strcmp(att, "justify")) |
1028 | 0 | style->text_align = TA_JUSTIFY; |
1029 | |
|
1030 | 0 | att = fz_xml_att(node, "bgcolor"); |
1031 | 0 | if (att) |
1032 | 0 | style->background_color = fz_css_color_from_string(att); |
1033 | |
|
1034 | 0 | att = fz_xml_att(node, "border"); |
1035 | 0 | if (att) |
1036 | 0 | { |
1037 | 0 | style->border_width[3].unit = style->border_width[2].unit = style->border_width[1].unit = style->border_width[0].unit = N_LENGTH; |
1038 | 0 | style->border_width[3].value = style->border_width[2].value = style->border_width[1].value = style->border_width[0].value = fz_atof(att); |
1039 | 0 | } |
1040 | |
|
1041 | 0 | att = fz_xml_att(node, "hidden"); |
1042 | 0 | if(att) |
1043 | 0 | style->visibility = V_HIDDEN; |
1044 | 0 | } |
1045 | | |
1046 | | static void gen2_text(fz_context *ctx, struct genstate *g, fz_html_box *root_box, fz_xml *node) |
1047 | 0 | { |
1048 | 0 | fz_html_box *anon_box; |
1049 | 0 | fz_css_style style; |
1050 | 0 | const char *text; |
1051 | 0 | int collapse; |
1052 | |
|
1053 | 0 | text = fz_xml_text(node); |
1054 | 0 | collapse = root_box->style->white_space & WS_COLLAPSE; |
1055 | 0 | if (collapse && is_all_white(text)) |
1056 | 0 | { |
1057 | 0 | g->emit_white = root_box; |
1058 | 0 | } |
1059 | 0 | else |
1060 | 0 | { |
1061 | 0 | if (root_box->type != BOX_INLINE) |
1062 | 0 | { |
1063 | | /* Create anonymous inline box, with the same style as the top block box. */ |
1064 | 0 | style = *root_box->style; |
1065 | | |
1066 | | // Make sure not to recursively multiply font sizes |
1067 | 0 | style.font_size.value = 1; |
1068 | 0 | style.font_size.unit = N_SCALE; |
1069 | |
|
1070 | 0 | root_box = find_inline_context(ctx, g, root_box); |
1071 | 0 | anon_box = new_box(ctx, g, NULL, BOX_INLINE, &style); |
1072 | 0 | append_box(ctx, root_box, anon_box); |
1073 | 0 | root_box = anon_box; |
1074 | 0 | } |
1075 | |
|
1076 | 0 | generate_text(ctx, root_box, text, g->markup_lang, g); |
1077 | 0 | } |
1078 | 0 | } |
1079 | | |
1080 | | static fz_html_box *gen2_inline(fz_context *ctx, struct genstate *g, fz_html_box *root_box, fz_xml *node, fz_css_style *style) |
1081 | 0 | { |
1082 | 0 | fz_html_box *this_box; |
1083 | 0 | fz_html_box *flow_box; |
1084 | 0 | root_box = find_inline_context(ctx, g, root_box); |
1085 | 0 | this_box = new_box(ctx, g, node, BOX_INLINE, style); |
1086 | 0 | append_box(ctx, root_box, this_box); |
1087 | 0 | if (this_box->id) |
1088 | 0 | { |
1089 | 0 | flow_box = find_flow_encloser(ctx, this_box); |
1090 | 0 | add_flow_anchor(ctx, g->pool, flow_box, this_box); |
1091 | 0 | } |
1092 | 0 | return this_box; |
1093 | 0 | } |
1094 | | |
1095 | | static void gen2_break(fz_context *ctx, struct genstate *g, fz_html_box *root_box, fz_xml *node) |
1096 | 0 | { |
1097 | 0 | fz_html_box *this_box; |
1098 | 0 | fz_html_box *flow_box; |
1099 | |
|
1100 | 0 | if (root_box->type != BOX_INLINE) |
1101 | 0 | { |
1102 | | /* Create inline box to hold the <br> tag, with the same style as containing block. */ |
1103 | | /* Make sure not to recursively multiply font sizes. */ |
1104 | 0 | fz_css_style style = *root_box->style; |
1105 | 0 | style.font_size.value = 1; |
1106 | 0 | style.font_size.unit = N_SCALE; |
1107 | 0 | this_box = new_box(ctx, g, node, BOX_INLINE, &style); |
1108 | 0 | append_box(ctx, find_inline_context(ctx, g, root_box), this_box); |
1109 | 0 | } |
1110 | 0 | else |
1111 | 0 | { |
1112 | 0 | this_box = root_box; |
1113 | 0 | } |
1114 | |
|
1115 | 0 | flow_box = find_flow_encloser(ctx, this_box); |
1116 | 0 | add_flow_break(ctx, g->pool, flow_box, this_box); |
1117 | 0 | g->at_bol = 1; |
1118 | 0 | } |
1119 | | |
1120 | | static fz_html_box *gen2_block(fz_context *ctx, struct genstate *g, fz_html_box *root_box, fz_xml *node, fz_css_style *style) |
1121 | 0 | { |
1122 | 0 | fz_html_box *this_box; |
1123 | 0 | root_box = find_block_context(ctx, root_box); |
1124 | 0 | this_box = new_box(ctx, g, node, BOX_BLOCK, style); |
1125 | 0 | append_box(ctx, root_box, this_box); |
1126 | 0 | return this_box; |
1127 | 0 | } |
1128 | | |
1129 | | static void |
1130 | | push_colstyle(fz_context *ctx, table_styles *ts, col_style cs) |
1131 | 0 | { |
1132 | 0 | if (ts->ncols == ts->maxcols) |
1133 | 0 | { |
1134 | 0 | int newmax = ts->maxcols * 2; |
1135 | 0 | if (newmax == 0) |
1136 | 0 | newmax = 8; |
1137 | 0 | ts->styles = fz_realloc(ctx, ts->styles, sizeof(ts->styles[0]) * newmax); |
1138 | 0 | ts->maxcols = newmax; |
1139 | 0 | } |
1140 | |
|
1141 | 0 | ts->styles[ts->ncols++] = cs; |
1142 | 0 | } |
1143 | | |
1144 | | static void gen2_col(fz_context *ctx, struct genstate *g, fz_html_box *root_box, fz_xml *node, fz_css_match *match) |
1145 | 0 | { |
1146 | 0 | const char *span = fz_xml_att(node, "span"); |
1147 | 0 | col_style cs = { 0 }; |
1148 | 0 | int i; |
1149 | 0 | int n = span ? fz_atoi(span) : 1; |
1150 | 0 | if (n < 1) |
1151 | 0 | n = 1; |
1152 | | |
1153 | | /* Get the col styles. */ |
1154 | 0 | fz_css_colstyle(&cs, match); |
1155 | | |
1156 | | /* FIXME: width attr ? */ |
1157 | |
|
1158 | 0 | for (i = 0; i < n; i++) |
1159 | 0 | push_colstyle(ctx, &g->tab_styles, cs); |
1160 | 0 | } |
1161 | | |
1162 | | /* All this does is give us a warning if we fail to be in a table. */ |
1163 | | static void gen2_colgroup(fz_context *ctx, fz_html_box *root_box) |
1164 | 0 | { |
1165 | 0 | fz_html_box *look = root_box; |
1166 | 0 | while (look && look->type != BOX_TABLE) |
1167 | 0 | look = look->up; |
1168 | 0 | if (!look) |
1169 | 0 | fz_warn(ctx, "colgroup not inside table element"); |
1170 | 0 | } |
1171 | | |
1172 | | static fz_html_box *gen2_table(fz_context *ctx, struct genstate *g, fz_html_box *root_box, fz_xml *node, fz_css_style *style) |
1173 | 0 | { |
1174 | 0 | fz_html_box *this_box; |
1175 | 0 | root_box = find_block_context(ctx, root_box); |
1176 | 0 | this_box = new_box(ctx, g, node, BOX_TABLE, style); |
1177 | 0 | append_box(ctx, root_box, this_box); |
1178 | 0 | return this_box; |
1179 | 0 | } |
1180 | | |
1181 | | static fz_html_box *gen2_table_row(fz_context *ctx, struct genstate *g, fz_html_box *root_box, fz_xml *node, fz_css_style *style) |
1182 | 0 | { |
1183 | 0 | fz_html_box *this_box, *table_box; |
1184 | |
|
1185 | 0 | table_box = find_table_row_context(ctx, root_box); |
1186 | 0 | if (!table_box) |
1187 | 0 | return gen2_block(ctx, g, root_box, node, style); |
1188 | | |
1189 | 0 | this_box = new_box(ctx, g, node, BOX_TABLE_ROW, style); |
1190 | 0 | append_box(ctx, table_box, this_box); |
1191 | 0 | g->col_num = 0; |
1192 | 0 | return this_box; |
1193 | 0 | } |
1194 | | |
1195 | | static fz_html_box *gen2_table_cell(fz_context *ctx, struct genstate *g, fz_html_box *root_box, fz_xml *node, fz_css_style *style, fz_css_match *root_match) |
1196 | 0 | { |
1197 | 0 | fz_html_box *this_box, *row_box; |
1198 | 0 | fz_css_style style2; |
1199 | 0 | fz_css_match match; |
1200 | |
|
1201 | 0 | row_box = find_table_cell_context(ctx, root_box); |
1202 | 0 | if (!row_box) |
1203 | 0 | return gen2_block(ctx, g, root_box, node, style); |
1204 | | |
1205 | 0 | fz_match_css(ctx, &match, root_match, g->css, node); |
1206 | 0 | fz_apply_css_style(ctx, g->set, style, &match); |
1207 | 0 | if (g->col_num < g->tab_styles.ncols) |
1208 | 0 | { |
1209 | | /* Make a local copy of the style, and overlay anything onto it from col. */ |
1210 | 0 | col_style *cs = &g->tab_styles.styles[g->col_num]; |
1211 | 0 | style2 = *style; |
1212 | 0 | style = &style2; |
1213 | 0 | if (cs->has_bg_col) |
1214 | 0 | style->background_color = cs->background_color; |
1215 | 0 | if (cs->has_border_col & 1) |
1216 | 0 | style->border_color[0] = cs->border_color[0]; |
1217 | 0 | if (cs->has_border_col & 2) |
1218 | 0 | style->border_color[1] = cs->border_color[1]; |
1219 | 0 | if (cs->has_border_col & 4) |
1220 | 0 | style->border_color[2] = cs->border_color[2]; |
1221 | 0 | if (cs->has_border_col & 8) |
1222 | 0 | style->border_color[3] = cs->border_color[3]; |
1223 | 0 | if (cs->has_border_width & 1) |
1224 | 0 | style->border_width[0] = cs->border_width[0]; |
1225 | 0 | if (cs->has_border_width & 2) |
1226 | 0 | style->border_width[1] = cs->border_width[1]; |
1227 | 0 | if (cs->has_border_width & 4) |
1228 | 0 | style->border_width[2] = cs->border_width[2]; |
1229 | 0 | if (cs->has_border_width & 8) |
1230 | 0 | style->border_width[3] = cs->border_width[3]; |
1231 | 0 | if (cs->has_visibility) |
1232 | 0 | style->visibility = cs->visibility; |
1233 | 0 | if (cs->has_width) |
1234 | 0 | style->width = cs->width; |
1235 | 0 | } |
1236 | 0 | apply_attributes_as_styles(ctx, style, node); |
1237 | 0 | g->col_num++; |
1238 | |
|
1239 | 0 | this_box = new_box(ctx, g, node, BOX_TABLE_CELL, style); |
1240 | 0 | append_box(ctx, row_box, this_box); |
1241 | 0 | return this_box; |
1242 | 0 | } |
1243 | | |
1244 | | static void gen2_image_common(fz_context *ctx, struct genstate *g, fz_html_box *root_box, fz_xml *node, fz_image *img, int display, fz_css_style *style) |
1245 | 0 | { |
1246 | 0 | fz_html_box *img_block_box; |
1247 | 0 | fz_html_box *img_inline_box; |
1248 | |
|
1249 | 0 | if (display == DIS_INLINE || display == DIS_INLINE_BLOCK) |
1250 | 0 | { |
1251 | 0 | root_box = find_inline_context(ctx, g, root_box); |
1252 | 0 | img_inline_box = new_box(ctx, g, node, BOX_INLINE, style); |
1253 | 0 | append_box(ctx, root_box, img_inline_box); |
1254 | 0 | generate_image(ctx, img_inline_box, img, g); |
1255 | 0 | } |
1256 | 0 | else |
1257 | 0 | { |
1258 | 0 | root_box = find_block_context(ctx, root_box); |
1259 | 0 | img_block_box = new_box(ctx, g, node, BOX_BLOCK, style); |
1260 | 0 | append_box(ctx, root_box, img_block_box); |
1261 | |
|
1262 | 0 | root_box = find_inline_context(ctx, g, img_block_box); |
1263 | 0 | img_inline_box = new_box(ctx, g, NULL, BOX_INLINE, style); |
1264 | 0 | append_box(ctx, root_box, img_inline_box); |
1265 | 0 | generate_image(ctx, img_inline_box, img, g); |
1266 | 0 | } |
1267 | 0 | } |
1268 | | |
1269 | | static void gen2_image_html(fz_context *ctx, struct genstate *g, fz_html_box *root_box, fz_xml *node, int display, fz_css_style *style) |
1270 | 0 | { |
1271 | 0 | const char *src = fz_xml_att(node, "src"); |
1272 | 0 | if (src) |
1273 | 0 | { |
1274 | 0 | fz_css_style local_style = *style; |
1275 | 0 | fz_image *img; |
1276 | 0 | int w, h; |
1277 | 0 | const char *w_att = fz_xml_att(node, "width"); |
1278 | 0 | const char *h_att = fz_xml_att(node, "height"); |
1279 | |
|
1280 | 0 | if (w_att && (w = fz_atoi(w_att)) > 0) |
1281 | 0 | { |
1282 | 0 | local_style.width.value = w; |
1283 | 0 | local_style.width.unit = strchr(w_att, '%') ? N_PERCENT : N_LENGTH; |
1284 | 0 | } |
1285 | 0 | if (h_att && (h = fz_atoi(h_att)) > 0) |
1286 | 0 | { |
1287 | 0 | local_style.height.value = h; |
1288 | 0 | local_style.height.unit = strchr(h_att, '%') ? N_PERCENT : N_LENGTH; |
1289 | 0 | } |
1290 | |
|
1291 | 0 | img = load_html_image(ctx, g->zip, g->base_uri, src); |
1292 | 0 | gen2_image_common(ctx, g, root_box, node, img, display, &local_style); |
1293 | 0 | } |
1294 | 0 | } |
1295 | | |
1296 | | static void gen2_image_fb2(fz_context *ctx, struct genstate *g, fz_html_box *root_box, fz_xml *node, int display, fz_css_style *style) |
1297 | 0 | { |
1298 | 0 | const char *src = fz_xml_att(node, "l:href"); |
1299 | 0 | if (!src) |
1300 | 0 | src = fz_xml_att(node, "xlink:href"); |
1301 | 0 | if (src && src[0] == '#') |
1302 | 0 | { |
1303 | 0 | fz_image *img = fz_tree_lookup(ctx, g->images, src+1); |
1304 | 0 | gen2_image_common(ctx, g, root_box, node, fz_keep_image(ctx, img), display, style); |
1305 | 0 | } |
1306 | 0 | } |
1307 | | |
1308 | | static void gen2_image_svg(fz_context *ctx, struct genstate *g, fz_html_box *root_box, fz_xml *node, int display, fz_css_style *style) |
1309 | 0 | { |
1310 | 0 | fz_image *img = load_svg_image(ctx, g->zip, g->base_uri, g->xml, node); |
1311 | 0 | gen2_image_common(ctx, g, root_box, node, img, display, style); |
1312 | 0 | } |
1313 | | |
1314 | | static int get_heading_from_tag(fz_context *ctx, struct genstate *g, const char *tag) |
1315 | 0 | { |
1316 | 0 | if (tag[0] == 'h' && tag[1] != 0 && tag[2] == 0) |
1317 | 0 | { |
1318 | 0 | switch (tag[1]) |
1319 | 0 | { |
1320 | 0 | case '1': return 1; |
1321 | 0 | case '2': return 2; |
1322 | 0 | case '3': return 3; |
1323 | 0 | case '4': return 4; |
1324 | 0 | case '5': return 5; |
1325 | 0 | case '6': return 6; |
1326 | 0 | } |
1327 | 0 | } |
1328 | 0 | if (g->is_fb2) |
1329 | 0 | { |
1330 | 0 | if (!strcmp(tag, "title") || !strcmp(tag, "subtitle")) |
1331 | 0 | return fz_mini(g->section_depth, 6); |
1332 | 0 | } |
1333 | 0 | return 0; |
1334 | 0 | } |
1335 | | |
1336 | | static void gen2_tag(fz_context *ctx, struct genstate *g, fz_html_box *root_box, fz_xml *node, |
1337 | | fz_css_match *match, int display, fz_css_style *style) |
1338 | 0 | { |
1339 | 0 | fz_html_box *this_box = NULL; |
1340 | 0 | const char *tag; |
1341 | 0 | const char *lang_att; |
1342 | 0 | const char *dir_att; |
1343 | |
|
1344 | 0 | int save_markup_dir = g->markup_dir; |
1345 | 0 | int save_markup_lang = g->markup_lang; |
1346 | 0 | char *save_href = g->href; |
1347 | |
|
1348 | 0 | if (display == DIS_NONE) |
1349 | 0 | return; |
1350 | | |
1351 | 0 | tag = fz_xml_tag(node); |
1352 | |
|
1353 | 0 | if (style->direction == FZ_BIDI_UNSET) |
1354 | 0 | { |
1355 | 0 | dir_att = fz_xml_att(node, "dir"); |
1356 | 0 | if (dir_att) |
1357 | 0 | { |
1358 | 0 | if (!strcmp(dir_att, "auto")) |
1359 | 0 | g->markup_dir = FZ_BIDI_NEUTRAL; |
1360 | 0 | else if (!strcmp(dir_att, "rtl")) |
1361 | 0 | g->markup_dir = FZ_BIDI_RTL; |
1362 | 0 | else if (!strcmp(dir_att, "ltr")) |
1363 | 0 | g->markup_dir = FZ_BIDI_LTR; |
1364 | 0 | else |
1365 | 0 | g->markup_dir = FZ_BIDI_LTR; |
1366 | 0 | } |
1367 | 0 | } |
1368 | 0 | else |
1369 | 0 | { |
1370 | 0 | g->markup_dir = style->direction; |
1371 | 0 | } |
1372 | |
|
1373 | 0 | lang_att = fz_xml_att(node, "lang"); |
1374 | 0 | if (lang_att) |
1375 | 0 | g->markup_lang = fz_text_language_from_string(lang_att); |
1376 | |
|
1377 | 0 | switch (display) |
1378 | 0 | { |
1379 | 0 | case DIS_INLINE_BLOCK: |
1380 | | // TODO handle inline block as a flow node |
1381 | 0 | this_box = gen2_block(ctx, g, root_box, node, style); |
1382 | 0 | break; |
1383 | | |
1384 | 0 | case DIS_BLOCK: |
1385 | 0 | this_box = gen2_block(ctx, g, root_box, node, style); |
1386 | 0 | this_box->heading = get_heading_from_tag(ctx, g, tag); |
1387 | 0 | break; |
1388 | | |
1389 | 0 | case DIS_LIST_ITEM: |
1390 | 0 | this_box = gen2_block(ctx, g, root_box, node, style); |
1391 | 0 | this_box->list_item = ++g->list_counter; |
1392 | 0 | break; |
1393 | | |
1394 | | // TODO: https://www.w3.org/TR/CSS2/tables.html#anonymous-boxes |
1395 | | // |
1396 | | // The table generation code should insert and create anonymous boxes |
1397 | | // for any missing child/parent elements. |
1398 | | // |
1399 | | // MISSING CHILDREN: |
1400 | | // 1: Wrap consecutive BLOCK found in a TABLE in an anon TABLE_ROW. |
1401 | | // 2: Wrap consecutive BLOCK found in a TABLE_ROW in an anon TABLE_CELL. |
1402 | | // |
1403 | | // MISSING PARENTS: |
1404 | | // 1: Wrap consecutive TABLE_CELL found outside TABLE_ROW in an anon TABLE_ROW |
1405 | | // 2: Wrap consecutive TABLE_ROW found outside TABLE in an anon TABLE |
1406 | | // |
1407 | | // For now we ignore this and treat any such elements that are out of |
1408 | | // context as plain block elements. |
1409 | | |
1410 | 0 | case DIS_TABLE: |
1411 | 0 | this_box = gen2_table(ctx, g, root_box, node, style); |
1412 | 0 | break; |
1413 | 0 | case DIS_TABLE_GROUP: |
1414 | | // no box for table-row-group elements |
1415 | 0 | this_box = root_box; |
1416 | 0 | break; |
1417 | 0 | case DIS_TABLE_ROW: |
1418 | 0 | this_box = gen2_table_row(ctx, g, root_box, node, style); |
1419 | 0 | break; |
1420 | 0 | case DIS_TABLE_CELL: |
1421 | 0 | this_box = gen2_table_cell(ctx, g, root_box, node, style, match); |
1422 | 0 | break; |
1423 | | |
1424 | 0 | case DIS_TABLE_COLGROUP: |
1425 | 0 | gen2_colgroup(ctx, root_box); |
1426 | | // no box for colgroup elements. |
1427 | 0 | this_box = root_box; |
1428 | 0 | break; |
1429 | | |
1430 | 0 | case DIS_INLINE: |
1431 | 0 | default: |
1432 | 0 | this_box = gen2_inline(ctx, g, root_box, node, style); |
1433 | 0 | break; |
1434 | 0 | } |
1435 | | |
1436 | 0 | if (this_box == NULL) |
1437 | 0 | goto end; |
1438 | | |
1439 | 0 | if (tag && (!strcmp(tag, "ol") || !strcmp(tag, "ul") || !strcmp(tag, "dl"))) |
1440 | 0 | { |
1441 | 0 | int save_list_counter = g->list_counter; |
1442 | 0 | g->list_counter = 0; |
1443 | 0 | gen2_children(ctx, g, this_box, node, match); |
1444 | 0 | g->list_counter = save_list_counter; |
1445 | 0 | } |
1446 | 0 | else if (tag && !strcmp(tag, "section")) |
1447 | 0 | { |
1448 | 0 | int save_section_depth = g->section_depth; |
1449 | 0 | g->section_depth++; |
1450 | 0 | gen2_children(ctx, g, this_box, node, match); |
1451 | 0 | g->section_depth = save_section_depth; |
1452 | 0 | } |
1453 | 0 | else if (display == DIS_TABLE) |
1454 | 0 | { |
1455 | 0 | table_styles saved_styles = g->tab_styles; |
1456 | 0 | int saved_col_num = g->col_num; |
1457 | 0 | fz_try(ctx) |
1458 | 0 | { |
1459 | 0 | g->tab_styles.maxcols = 0; |
1460 | 0 | g->tab_styles.ncols = 0; |
1461 | 0 | g->tab_styles.styles = NULL; |
1462 | 0 | gen2_children(ctx, g, this_box, node, match); |
1463 | 0 | } |
1464 | 0 | fz_always(ctx) |
1465 | 0 | { |
1466 | 0 | drop_table_styles(ctx, &g->tab_styles); |
1467 | 0 | g->tab_styles = saved_styles; |
1468 | 0 | g->col_num = saved_col_num; |
1469 | 0 | } |
1470 | 0 | fz_catch(ctx) |
1471 | 0 | fz_rethrow(ctx); |
1472 | 0 | } |
1473 | 0 | else |
1474 | 0 | { |
1475 | 0 | gen2_children(ctx, g, this_box, node, match); |
1476 | 0 | } |
1477 | | |
1478 | 0 | end: |
1479 | 0 | g->markup_dir = save_markup_dir; |
1480 | 0 | g->markup_lang = save_markup_lang; |
1481 | 0 | g->href = save_href; |
1482 | 0 | } |
1483 | | |
1484 | | static void gen2_children(fz_context *ctx, struct genstate *g, fz_html_box *root_box, fz_xml *root_node, fz_css_match *root_match) |
1485 | 0 | { |
1486 | 0 | fz_xml *node; |
1487 | 0 | const char *tag; |
1488 | 0 | fz_css_match match; |
1489 | 0 | fz_css_style style; |
1490 | 0 | int display; |
1491 | |
|
1492 | 0 | for (node = fz_xml_down(root_node); node; node = fz_xml_next(node)) |
1493 | 0 | { |
1494 | 0 | tag = fz_xml_tag(node); |
1495 | 0 | if (tag) |
1496 | 0 | { |
1497 | 0 | fz_match_css(ctx, &match, root_match, g->css, node); |
1498 | 0 | fz_apply_css_style(ctx, g->set, &style, &match); |
1499 | 0 | apply_attributes_as_styles(ctx, &style, node); |
1500 | 0 | display = fz_get_css_match_display(&match); |
1501 | 0 | if (tag[0]=='b' && tag[1]=='r' && tag[2]==0) |
1502 | 0 | { |
1503 | 0 | gen2_break(ctx, g, root_box, node); |
1504 | 0 | } |
1505 | 0 | else if (tag[0]=='i' && tag[1]=='m' && tag[2]=='g' && tag[3]==0) |
1506 | 0 | { |
1507 | 0 | gen2_image_html(ctx, g, root_box, node, display, &style); |
1508 | 0 | } |
1509 | 0 | else if (g->is_fb2 && tag[0]=='i' && tag[1]=='m' && tag[2]=='a' && tag[3]=='g' && tag[4]=='e' && tag[5]==0) |
1510 | 0 | { |
1511 | 0 | gen2_image_fb2(ctx, g, root_box, node, display, &style); |
1512 | 0 | } |
1513 | 0 | else if (tag[0]=='s' && tag[1]=='v' && tag[2]=='g' && tag[3]==0) |
1514 | 0 | { |
1515 | 0 | gen2_image_svg(ctx, g, root_box, node, display, &style); |
1516 | 0 | } |
1517 | 0 | else if (tag[0]=='c' && tag[1]=='o' && tag[2]=='l' && tag[3]==0) |
1518 | 0 | { |
1519 | 0 | gen2_col(ctx, g, root_box, node, &match); |
1520 | 0 | } |
1521 | 0 | else |
1522 | 0 | { |
1523 | 0 | gen2_tag(ctx, g, root_box, node, &match, display, &style); |
1524 | 0 | } |
1525 | 0 | } |
1526 | 0 | else |
1527 | 0 | { |
1528 | 0 | gen2_text(ctx, g, root_box, node); |
1529 | 0 | } |
1530 | 0 | } |
1531 | 0 | } |
1532 | | |
1533 | | static void |
1534 | | html_load_css_link(fz_context *ctx, fz_html_font_set *set, fz_archive *zip, const char *base_uri, fz_css *css, fz_xml *root, const char *href) |
1535 | 0 | { |
1536 | 0 | char path[2048]; |
1537 | 0 | char css_base_uri[2048]; |
1538 | 0 | fz_buffer *buf; |
1539 | |
|
1540 | 0 | fz_var(buf); |
1541 | |
|
1542 | 0 | fz_strlcpy(path, base_uri, sizeof path); |
1543 | 0 | fz_strlcat(path, "/", sizeof path); |
1544 | 0 | fz_strlcat(path, href, sizeof path); |
1545 | 0 | fz_urldecode(path); |
1546 | 0 | fz_cleanname(path); |
1547 | |
|
1548 | 0 | fz_dirname(css_base_uri, path, sizeof css_base_uri); |
1549 | |
|
1550 | 0 | buf = NULL; |
1551 | 0 | fz_try(ctx) |
1552 | 0 | { |
1553 | 0 | buf = fz_read_archive_entry(ctx, zip, path); |
1554 | 0 | fz_parse_css(ctx, css, fz_string_from_buffer(ctx, buf), path); |
1555 | 0 | fz_add_css_font_faces(ctx, set, zip, css_base_uri, css); |
1556 | 0 | } |
1557 | 0 | fz_always(ctx) |
1558 | 0 | fz_drop_buffer(ctx, buf); |
1559 | 0 | fz_catch(ctx) |
1560 | 0 | { |
1561 | 0 | fz_rethrow_if(ctx, FZ_ERROR_SYSTEM); |
1562 | 0 | fz_report_error(ctx); |
1563 | 0 | fz_warn(ctx, "ignoring stylesheet %s", path); |
1564 | 0 | } |
1565 | 0 | } |
1566 | | |
1567 | | static void |
1568 | | html_load_css(fz_context *ctx, fz_html_font_set *set, fz_archive *zip, const char *base_uri, fz_css *css, fz_xml *root) |
1569 | 0 | { |
1570 | 0 | fz_xml *html, *head, *node; |
1571 | |
|
1572 | 0 | html = fz_xml_find(root, "html"); |
1573 | 0 | head = fz_xml_find_down(html, "head"); |
1574 | 0 | for (node = fz_xml_down(head); node; node = fz_xml_next(node)) |
1575 | 0 | { |
1576 | 0 | if (fz_xml_is_tag(node, "link")) |
1577 | 0 | { |
1578 | 0 | char *rel = fz_xml_att(node, "rel"); |
1579 | 0 | if (rel && !fz_strcasecmp(rel, "stylesheet")) |
1580 | 0 | { |
1581 | 0 | char *type = fz_xml_att(node, "type"); |
1582 | 0 | if ((type && !strcmp(type, "text/css")) || !type) |
1583 | 0 | { |
1584 | 0 | char *href = fz_xml_att(node, "href"); |
1585 | 0 | if (href) |
1586 | 0 | { |
1587 | 0 | html_load_css_link(ctx, set, zip, base_uri, css, root, href); |
1588 | 0 | } |
1589 | 0 | } |
1590 | 0 | } |
1591 | 0 | } |
1592 | 0 | else if (fz_xml_is_tag(node, "style")) |
1593 | 0 | { |
1594 | 0 | char *s = fz_new_text_from_xml(ctx, node); |
1595 | 0 | fz_try(ctx) |
1596 | 0 | { |
1597 | 0 | fz_parse_css(ctx, css, s, "<style>"); |
1598 | 0 | fz_add_css_font_faces(ctx, set, zip, base_uri, css); |
1599 | 0 | } |
1600 | 0 | fz_always(ctx) |
1601 | 0 | fz_free(ctx, s); |
1602 | 0 | fz_catch(ctx) |
1603 | 0 | { |
1604 | 0 | fz_rethrow_if(ctx, FZ_ERROR_SYSTEM); |
1605 | 0 | fz_report_error(ctx); |
1606 | 0 | fz_warn(ctx, "ignoring inline stylesheet"); |
1607 | 0 | } |
1608 | 0 | } |
1609 | 0 | } |
1610 | 0 | } |
1611 | | |
1612 | | static void |
1613 | | fb2_load_css(fz_context *ctx, fz_html_font_set *set, fz_archive *zip, const char *base_uri, fz_css *css, fz_xml *root) |
1614 | 0 | { |
1615 | 0 | fz_xml *fictionbook, *stylesheet; |
1616 | |
|
1617 | 0 | fictionbook = fz_xml_find(root, "FictionBook"); |
1618 | 0 | stylesheet = fz_xml_find_down(fictionbook, "stylesheet"); |
1619 | 0 | if (stylesheet) |
1620 | 0 | { |
1621 | 0 | char *s = fz_new_text_from_xml(ctx, stylesheet); |
1622 | 0 | fz_try(ctx) |
1623 | 0 | { |
1624 | 0 | fz_parse_css(ctx, css, s, "<stylesheet>"); |
1625 | 0 | fz_add_css_font_faces(ctx, set, zip, base_uri, css); |
1626 | 0 | } |
1627 | 0 | fz_catch(ctx) |
1628 | 0 | { |
1629 | 0 | fz_rethrow_if(ctx, FZ_ERROR_SYSTEM); |
1630 | 0 | fz_report_error(ctx); |
1631 | 0 | fz_warn(ctx, "ignoring inline stylesheet"); |
1632 | 0 | } |
1633 | 0 | fz_free(ctx, s); |
1634 | 0 | } |
1635 | 0 | } |
1636 | | |
1637 | | static fz_tree * |
1638 | | load_fb2_images(fz_context *ctx, fz_xml *root) |
1639 | 0 | { |
1640 | 0 | fz_xml *fictionbook, *binary; |
1641 | 0 | fz_tree *images = NULL; |
1642 | |
|
1643 | 0 | fictionbook = fz_xml_find(root, "FictionBook"); |
1644 | 0 | for (binary = fz_xml_find_down(fictionbook, "binary"); binary; binary = fz_xml_find_next(binary, "binary")) |
1645 | 0 | { |
1646 | 0 | const char *id = fz_xml_att(binary, "id"); |
1647 | 0 | char *b64 = NULL; |
1648 | 0 | fz_buffer *buf = NULL; |
1649 | 0 | fz_image *img = NULL; |
1650 | |
|
1651 | 0 | fz_var(b64); |
1652 | 0 | fz_var(buf); |
1653 | |
|
1654 | 0 | if (id == NULL) |
1655 | 0 | { |
1656 | 0 | fz_warn(ctx, "Skipping image with no id"); |
1657 | 0 | continue; |
1658 | 0 | } |
1659 | | |
1660 | 0 | fz_try(ctx) |
1661 | 0 | { |
1662 | 0 | b64 = fz_new_text_from_xml(ctx, binary); |
1663 | 0 | buf = fz_new_buffer_from_base64(ctx, b64, strlen(b64)); |
1664 | 0 | img = fz_new_image_from_buffer(ctx, buf); |
1665 | 0 | } |
1666 | 0 | fz_always(ctx) |
1667 | 0 | { |
1668 | 0 | fz_drop_buffer(ctx, buf); |
1669 | 0 | fz_free(ctx, b64); |
1670 | 0 | } |
1671 | 0 | fz_catch(ctx) |
1672 | 0 | fz_rethrow(ctx); |
1673 | | |
1674 | 0 | images = fz_tree_insert(ctx, images, id, img); |
1675 | 0 | } |
1676 | | |
1677 | 0 | return images; |
1678 | 0 | } |
1679 | | |
1680 | | typedef struct |
1681 | | { |
1682 | | uint32_t *data; |
1683 | | size_t cap; |
1684 | | size_t len; |
1685 | | } uni_buf; |
1686 | | |
1687 | | typedef struct |
1688 | | { |
1689 | | fz_context *ctx; |
1690 | | fz_pool *pool; |
1691 | | fz_html_flow *flow; |
1692 | | uni_buf *buffer; |
1693 | | } bidi_data; |
1694 | | |
1695 | | static void fragment_cb(const uint32_t *fragment, |
1696 | | size_t fragment_len, |
1697 | | int bidi_level, |
1698 | | int script, |
1699 | | void *arg) |
1700 | 0 | { |
1701 | 0 | bidi_data *data = (bidi_data *)arg; |
1702 | | |
1703 | | /* We are guaranteed that fragmentOffset will be at the beginning |
1704 | | * of flow. */ |
1705 | 0 | while (fragment_len > 0) |
1706 | 0 | { |
1707 | 0 | size_t len; |
1708 | |
|
1709 | 0 | if (data->flow->type == FLOW_SPACE) |
1710 | 0 | { |
1711 | 0 | len = 1; |
1712 | 0 | } |
1713 | 0 | else if (data->flow->type == FLOW_BREAK || data->flow->type == FLOW_SBREAK || |
1714 | 0 | data->flow->type == FLOW_SHYPHEN || data->flow->type == FLOW_ANCHOR) |
1715 | 0 | { |
1716 | 0 | len = 0; |
1717 | 0 | } |
1718 | 0 | else |
1719 | 0 | { |
1720 | | /* Must be text */ |
1721 | 0 | len = fz_utflen(data->flow->content.text); |
1722 | 0 | if (len > fragment_len) |
1723 | 0 | { |
1724 | | /* We need to split this flow box */ |
1725 | 0 | (void)fz_html_split_flow(data->ctx, data->pool, data->flow, fragment_len); |
1726 | 0 | len = fz_utflen(data->flow->content.text); |
1727 | 0 | } |
1728 | 0 | } |
1729 | | |
1730 | | /* This flow box is entirely contained within this fragment. */ |
1731 | 0 | data->flow->bidi_level = bidi_level; |
1732 | 0 | data->flow->script = script; |
1733 | 0 | data->flow = data->flow->next; |
1734 | 0 | fragment_len -= len; |
1735 | 0 | } |
1736 | 0 | } |
1737 | | |
1738 | | static fz_bidi_direction |
1739 | | detect_flow_directionality(fz_context *ctx, fz_pool *pool, uni_buf *buffer, fz_bidi_direction bidi_dir, fz_html_flow *flow) |
1740 | 0 | { |
1741 | 0 | fz_html_flow *end = flow; |
1742 | 0 | bidi_data data; |
1743 | |
|
1744 | 0 | while (end) |
1745 | 0 | { |
1746 | 0 | unsigned int level = end->bidi_level; |
1747 | | |
1748 | | /* Gather the text from the flow up into a single buffer (at |
1749 | | * least, as much of it as has the same direction markup). */ |
1750 | 0 | buffer->len = 0; |
1751 | 0 | while (end && (level & 1) == (end->bidi_level & 1)) |
1752 | 0 | { |
1753 | 0 | size_t len = 0; |
1754 | 0 | const char *text = ""; |
1755 | 0 | int broken = 0; |
1756 | |
|
1757 | 0 | switch (end->type) |
1758 | 0 | { |
1759 | 0 | case FLOW_WORD: |
1760 | 0 | len = fz_utflen(end->content.text); |
1761 | 0 | text = end->content.text; |
1762 | 0 | break; |
1763 | 0 | case FLOW_SPACE: |
1764 | 0 | len = 1; |
1765 | 0 | text = " "; |
1766 | 0 | break; |
1767 | 0 | case FLOW_SHYPHEN: |
1768 | 0 | case FLOW_SBREAK: |
1769 | 0 | break; |
1770 | 0 | case FLOW_BREAK: |
1771 | 0 | case FLOW_IMAGE: |
1772 | 0 | broken = 1; |
1773 | 0 | break; |
1774 | 0 | } |
1775 | | |
1776 | 0 | end = end->next; |
1777 | |
|
1778 | 0 | if (broken) |
1779 | 0 | break; |
1780 | | |
1781 | | /* Make sure the buffer is large enough */ |
1782 | 0 | if (buffer->len + len > buffer->cap) |
1783 | 0 | { |
1784 | 0 | size_t newcap = buffer->cap; |
1785 | 0 | if (newcap < 128) |
1786 | 0 | newcap = 128; /* Sensible small default */ |
1787 | |
|
1788 | 0 | while (newcap < buffer->len + len) |
1789 | 0 | newcap = (newcap * 3) / 2; |
1790 | |
|
1791 | 0 | buffer->data = fz_realloc_array(ctx, buffer->data, newcap, uint32_t); |
1792 | 0 | buffer->cap = newcap; |
1793 | 0 | } |
1794 | | |
1795 | | /* Expand the utf8 text into Unicode and store it in the buffer */ |
1796 | 0 | while (*text) |
1797 | 0 | { |
1798 | 0 | int rune; |
1799 | 0 | text += fz_chartorune(&rune, text); |
1800 | 0 | buffer->data[buffer->len++] = rune; |
1801 | 0 | } |
1802 | 0 | } |
1803 | | |
1804 | | /* Detect directionality for the buffer */ |
1805 | 0 | data.ctx = ctx; |
1806 | 0 | data.pool = pool; |
1807 | 0 | data.flow = flow; |
1808 | 0 | data.buffer = buffer; |
1809 | 0 | fz_bidi_fragment_text(ctx, buffer->data, buffer->len, &bidi_dir, fragment_cb, &data, 0 /* Flags */); |
1810 | 0 | flow = end; |
1811 | 0 | } |
1812 | 0 | return bidi_dir; |
1813 | 0 | } |
1814 | | |
1815 | | static void |
1816 | | detect_box_directionality(fz_context *ctx, fz_pool *pool, uni_buf *buffer, fz_html_box *box) |
1817 | 0 | { |
1818 | 0 | while (box) |
1819 | 0 | { |
1820 | 0 | if (box->type == BOX_FLOW) |
1821 | 0 | box->markup_dir = detect_flow_directionality(ctx, pool, buffer, box->markup_dir, box->u.flow.head); |
1822 | 0 | detect_box_directionality(ctx, pool, buffer, box->down); |
1823 | 0 | box = box->next; |
1824 | 0 | } |
1825 | 0 | } |
1826 | | |
1827 | | static void |
1828 | | detect_directionality(fz_context *ctx, fz_pool *pool, fz_html_box *box) |
1829 | 0 | { |
1830 | 0 | uni_buf buffer = { NULL }; |
1831 | |
|
1832 | 0 | fz_try(ctx) |
1833 | 0 | detect_box_directionality(ctx, pool, &buffer, box); |
1834 | 0 | fz_always(ctx) |
1835 | 0 | fz_free(ctx, buffer.data); |
1836 | 0 | fz_catch(ctx) |
1837 | 0 | fz_rethrow(ctx); |
1838 | 0 | } |
1839 | | |
1840 | | static fz_xml_doc * |
1841 | | parse_to_xml(fz_context *ctx, fz_buffer *buf, int try_xml, int try_html5) |
1842 | 0 | { |
1843 | 0 | fz_xml_doc *xml; |
1844 | |
|
1845 | 0 | if (try_xml && try_html5) |
1846 | 0 | { |
1847 | 0 | fz_try(ctx) |
1848 | 0 | xml = fz_parse_xml(ctx, buf, 1); |
1849 | 0 | fz_catch(ctx) |
1850 | 0 | { |
1851 | 0 | if (fz_caught(ctx) == FZ_ERROR_SYNTAX) |
1852 | 0 | { |
1853 | 0 | fz_report_error(ctx); |
1854 | 0 | fz_warn(ctx, "syntax error in XHTML; retrying using HTML5 parser"); |
1855 | 0 | xml = fz_parse_xml_from_html5(ctx, buf); |
1856 | 0 | } |
1857 | 0 | else |
1858 | 0 | fz_rethrow(ctx); |
1859 | 0 | } |
1860 | 0 | } |
1861 | 0 | else if (try_xml) |
1862 | 0 | xml = fz_parse_xml(ctx, buf, 1); |
1863 | 0 | else |
1864 | 0 | { |
1865 | 0 | assert(try_html5); |
1866 | 0 | xml = fz_parse_xml_from_html5(ctx, buf); |
1867 | 0 | } |
1868 | | |
1869 | 0 | return xml; |
1870 | 0 | } |
1871 | | |
1872 | | static void move_background_color_style_up(fz_context *ctx, struct genstate *g, fz_html_box *root, fz_html_box *from) |
1873 | 0 | { |
1874 | 0 | fz_css_color transparent = { 0, 0, 0, 0 }; |
1875 | 0 | fz_css_style s1, s2; |
1876 | 0 | memcpy(&s1, root->style, sizeof s1); |
1877 | 0 | memcpy(&s2, from->style, sizeof s2); |
1878 | 0 | s1.background_color = s2.background_color; |
1879 | 0 | s2.background_color = transparent; |
1880 | 0 | root->style = fz_css_enlist(ctx, &s1, &g->styles, g->pool); |
1881 | 0 | from->style = fz_css_enlist(ctx, &s2, &g->styles, g->pool); |
1882 | 0 | } |
1883 | | |
1884 | | static void move_background_color_up(fz_context *ctx, struct genstate *g, fz_html_box *root) |
1885 | 0 | { |
1886 | 0 | fz_html_box *html, *body; |
1887 | |
|
1888 | 0 | if (root->style->background_color.a != 0) |
1889 | 0 | { |
1890 | 0 | return; |
1891 | 0 | } |
1892 | | |
1893 | 0 | html = root->down; |
1894 | 0 | if (html && !strcmp(html->tag, "html")) |
1895 | 0 | { |
1896 | 0 | if (html->style->background_color.a != 0) |
1897 | 0 | { |
1898 | 0 | move_background_color_style_up(ctx, g, root, html); |
1899 | 0 | return; |
1900 | 0 | } |
1901 | | |
1902 | 0 | body = html->down; |
1903 | 0 | if (body && !strcmp(body->tag, "body")) |
1904 | 0 | { |
1905 | 0 | if (body->style->background_color.a != 0) |
1906 | 0 | { |
1907 | 0 | move_background_color_style_up(ctx, g, root, body); |
1908 | 0 | return; |
1909 | 0 | } |
1910 | 0 | } |
1911 | 0 | } |
1912 | 0 | } |
1913 | | |
1914 | | static void |
1915 | | xml_to_boxes(fz_context *ctx, fz_html_font_set *set, fz_archive *zip, const char *base_uri, const char *user_css, |
1916 | | fz_xml_doc *xml, fz_html_tree *tree, char **rtitle, int try_fictionbook, int is_mobi) |
1917 | 0 | { |
1918 | 0 | fz_xml *root, *node; |
1919 | 0 | char *title; |
1920 | |
|
1921 | 0 | fz_css_match root_match, match; |
1922 | 0 | struct genstate g = {0}; |
1923 | |
|
1924 | 0 | g.pool = NULL; |
1925 | 0 | g.set = set; |
1926 | 0 | g.zip = zip; |
1927 | 0 | g.images = NULL; |
1928 | 0 | g.xml = xml; |
1929 | 0 | g.is_fb2 = 0; |
1930 | 0 | g.base_uri = base_uri; |
1931 | 0 | g.css = NULL; |
1932 | 0 | g.at_bol = 0; |
1933 | 0 | g.emit_white = 0; |
1934 | 0 | g.last_brk_cls = UCDN_LINEBREAK_CLASS_OP; |
1935 | 0 | g.list_counter = 0; |
1936 | 0 | g.section_depth = 0; |
1937 | 0 | g.markup_dir = FZ_BIDI_LTR; |
1938 | 0 | g.markup_lang = FZ_LANG_UNSET; |
1939 | 0 | g.href = NULL; |
1940 | 0 | g.styles = NULL; |
1941 | |
|
1942 | 0 | if (rtitle) |
1943 | 0 | *rtitle = NULL; |
1944 | |
|
1945 | 0 | root = fz_xml_root(g.xml); |
1946 | 0 | g.css = fz_new_css(ctx); |
1947 | |
|
1948 | 0 | #ifndef NDEBUG |
1949 | 0 | if (fz_atoi(getenv("FZ_DEBUG_XML"))) |
1950 | 0 | fz_debug_xml(root, 0); |
1951 | 0 | #endif |
1952 | |
|
1953 | 0 | fz_try(ctx) |
1954 | 0 | { |
1955 | 0 | if (try_fictionbook && fz_xml_find(root, "FictionBook")) |
1956 | 0 | { |
1957 | 0 | g.is_fb2 = 1; |
1958 | 0 | fz_parse_css(ctx, g.css, fb2_default_css, "<default:fb2>"); |
1959 | 0 | if (fz_use_document_css(ctx)) |
1960 | 0 | fb2_load_css(ctx, g.set, g.zip, g.base_uri, g.css, root); |
1961 | 0 | g.images = load_fb2_images(ctx, root); |
1962 | 0 | } |
1963 | 0 | else if (is_mobi) |
1964 | 0 | { |
1965 | 0 | g.is_fb2 = 0; |
1966 | 0 | fz_parse_css(ctx, g.css, html_default_css, "<default:html>"); |
1967 | 0 | fz_parse_css(ctx, g.css, mobi_default_css, "<default:mobi>"); |
1968 | 0 | if (fz_use_document_css(ctx)) |
1969 | 0 | html_load_css(ctx, g.set, g.zip, g.base_uri, g.css, root); |
1970 | 0 | } |
1971 | 0 | else |
1972 | 0 | { |
1973 | 0 | g.is_fb2 = 0; |
1974 | 0 | fz_parse_css(ctx, g.css, html_default_css, "<default:html>"); |
1975 | 0 | if (fz_use_document_css(ctx)) |
1976 | 0 | html_load_css(ctx, g.set, g.zip, g.base_uri, g.css, root); |
1977 | 0 | } |
1978 | |
|
1979 | 0 | if (user_css) |
1980 | 0 | { |
1981 | 0 | fz_parse_css(ctx, g.css, user_css, "<user>"); |
1982 | 0 | fz_add_css_font_faces(ctx, g.set, g.zip, ".", g.css); |
1983 | 0 | } |
1984 | 0 | } |
1985 | 0 | fz_catch(ctx) |
1986 | 0 | { |
1987 | 0 | drop_table_styles(ctx, &g.tab_styles); |
1988 | 0 | fz_drop_tree(ctx, g.images, (void(*)(fz_context*,void*))fz_drop_image); |
1989 | 0 | fz_drop_css(ctx, g.css); |
1990 | 0 | fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); |
1991 | 0 | fz_rethrow_if(ctx, FZ_ERROR_SYSTEM); |
1992 | 0 | fz_report_error(ctx); |
1993 | 0 | fz_warn(ctx, "ignoring styles"); |
1994 | 0 | g.css = fz_new_css(ctx); |
1995 | 0 | g.images = NULL; |
1996 | 0 | } |
1997 | |
|
1998 | 0 | #ifndef NDEBUG |
1999 | 0 | if (fz_atoi(getenv("FZ_DEBUG_CSS"))) |
2000 | 0 | fz_debug_css(ctx, g.css); |
2001 | 0 | #endif |
2002 | |
|
2003 | 0 | fz_try(ctx) |
2004 | 0 | { |
2005 | 0 | fz_css_style style; |
2006 | 0 | int display; |
2007 | |
|
2008 | 0 | fz_match_css_at_page(ctx, &root_match, g.css); |
2009 | 0 | fz_apply_css_style(ctx, g.set, &style, &root_match); |
2010 | |
|
2011 | 0 | g.pool = tree->pool; |
2012 | 0 | if (style.direction != FZ_BIDI_UNSET) |
2013 | 0 | g.markup_dir = style.direction; |
2014 | 0 | g.markup_lang = FZ_LANG_UNSET; |
2015 | | |
2016 | | // Create root node |
2017 | 0 | tree->root = new_box(ctx, &g, NULL, BOX_BLOCK, &style); |
2018 | | // TODO: transfer page margins out of this hacky box |
2019 | |
|
2020 | 0 | tree->root->tag = ":root"; |
2021 | 0 | tree->root->s.layout.em = 0; |
2022 | 0 | tree->root->s.layout.x = 0; |
2023 | 0 | tree->root->s.layout.y = 0; |
2024 | 0 | tree->root->s.layout.w = 0; |
2025 | 0 | tree->root->s.layout.b = 0; |
2026 | | |
2027 | | // Create document node (html). |
2028 | 0 | fz_match_css(ctx, &match, &root_match, g.css, root); |
2029 | 0 | fz_apply_css_style(ctx, g.set, &style, &match); |
2030 | 0 | display = fz_get_css_match_display(&match); |
2031 | 0 | gen2_tag(ctx, &g, tree->root, root, &match, display, &style); |
2032 | |
|
2033 | 0 | detect_directionality(ctx, g.pool, tree->root); |
2034 | |
|
2035 | 0 | if (g.is_fb2) |
2036 | 0 | { |
2037 | 0 | node = fz_xml_find(root, "FictionBook"); |
2038 | 0 | node = fz_xml_find_down(node, "description"); |
2039 | 0 | node = fz_xml_find_down(node, "title-info"); |
2040 | 0 | node = fz_xml_find_down(node, "book-title"); |
2041 | 0 | if (rtitle) |
2042 | 0 | { |
2043 | 0 | title = fz_xml_text(fz_xml_down(node)); |
2044 | 0 | if (title) |
2045 | 0 | *rtitle = fz_pool_strdup(ctx, g.pool, title); |
2046 | 0 | } |
2047 | 0 | } |
2048 | 0 | else |
2049 | 0 | { |
2050 | 0 | node = fz_xml_find(root, "html"); |
2051 | 0 | node = fz_xml_find_down(node, "head"); |
2052 | 0 | node = fz_xml_find_down(node, "title"); |
2053 | 0 | if (rtitle) |
2054 | 0 | { |
2055 | 0 | title = fz_xml_text(fz_xml_down(node)); |
2056 | 0 | if (title) |
2057 | 0 | *rtitle = fz_pool_strdup(ctx, g.pool, title); |
2058 | 0 | } |
2059 | | |
2060 | | // Move html or body background-color to :root. |
2061 | 0 | move_background_color_up(ctx, &g, tree->root); |
2062 | 0 | } |
2063 | 0 | } |
2064 | 0 | fz_always(ctx) |
2065 | 0 | { |
2066 | 0 | drop_table_styles(ctx, &g.tab_styles); |
2067 | 0 | fz_drop_tree(ctx, g.images, (void(*)(fz_context*,void*))fz_drop_image); |
2068 | 0 | fz_drop_css(ctx, g.css); |
2069 | 0 | } |
2070 | 0 | fz_catch(ctx) |
2071 | 0 | { |
2072 | 0 | if (rtitle) |
2073 | 0 | { |
2074 | 0 | fz_free(ctx, *rtitle); |
2075 | 0 | *rtitle = NULL; |
2076 | 0 | } |
2077 | 0 | fz_rethrow(ctx); |
2078 | 0 | } |
2079 | 0 | } |
2080 | | |
2081 | | static const char *mobi_font_size[7] = { |
2082 | | "0.67em", |
2083 | | "0.83em", |
2084 | | "1em", |
2085 | | "1.17em", |
2086 | | "1.33em", |
2087 | | "1.5em", |
2088 | | "1.67em", |
2089 | | }; |
2090 | | |
2091 | | static void |
2092 | | patch_mobi_html(fz_context *ctx, fz_pool *pool, fz_xml *node) |
2093 | 0 | { |
2094 | 0 | fz_xml *down; |
2095 | 0 | char buf[500]; |
2096 | 0 | while (node) |
2097 | 0 | { |
2098 | 0 | char *tag = fz_xml_tag(node); |
2099 | 0 | if (tag) |
2100 | 0 | { |
2101 | | // Read MOBI attributes, convert to inline CSS style |
2102 | 0 | if (!strcmp(tag, "font")) |
2103 | 0 | { |
2104 | 0 | const char *size = fz_xml_att(node, "size"); |
2105 | 0 | if (size) |
2106 | 0 | { |
2107 | 0 | if (!strcmp(size, "1")) size = mobi_font_size[0]; |
2108 | 0 | else if (!strcmp(size, "2")) size = mobi_font_size[1]; |
2109 | 0 | else if (!strcmp(size, "3")) size = mobi_font_size[2]; |
2110 | 0 | else if (!strcmp(size, "4")) size = mobi_font_size[3]; |
2111 | 0 | else if (!strcmp(size, "5")) size = mobi_font_size[4]; |
2112 | 0 | else if (!strcmp(size, "6")) size = mobi_font_size[5]; |
2113 | 0 | else if (!strcmp(size, "7")) size = mobi_font_size[6]; |
2114 | 0 | else if (!strcmp(size, "+1")) size = mobi_font_size[3]; |
2115 | 0 | else if (!strcmp(size, "+2")) size = mobi_font_size[4]; |
2116 | 0 | else if (!strcmp(size, "+3")) size = mobi_font_size[5]; |
2117 | 0 | else if (!strcmp(size, "+4")) size = mobi_font_size[6]; |
2118 | 0 | else if (!strcmp(size, "+5")) size = mobi_font_size[6]; |
2119 | 0 | else if (!strcmp(size, "+6")) size = mobi_font_size[6]; |
2120 | 0 | else if (!strcmp(size, "-1")) size = mobi_font_size[1]; |
2121 | 0 | else if (!strcmp(size, "-2")) size = mobi_font_size[0]; |
2122 | 0 | else if (!strcmp(size, "-3")) size = mobi_font_size[0]; |
2123 | 0 | else if (!strcmp(size, "-4")) size = mobi_font_size[0]; |
2124 | 0 | else if (!strcmp(size, "-5")) size = mobi_font_size[0]; |
2125 | 0 | else if (!strcmp(size, "-6")) size = mobi_font_size[0]; |
2126 | 0 | fz_snprintf(buf, sizeof buf, "font-size:%s", size); |
2127 | 0 | fz_xml_add_att(ctx, pool, node, "style", buf); |
2128 | 0 | } |
2129 | 0 | } |
2130 | 0 | else |
2131 | 0 | { |
2132 | 0 | char *height = fz_xml_att(node, "height"); |
2133 | 0 | char *width = fz_xml_att(node, "width"); |
2134 | 0 | char *align = fz_xml_att(node, "align"); |
2135 | 0 | if (height || width || align) |
2136 | 0 | { |
2137 | 0 | buf[0] = 0; |
2138 | 0 | if (height) |
2139 | 0 | { |
2140 | 0 | fz_strlcat(buf, "margin-top:", sizeof buf); |
2141 | 0 | fz_strlcat(buf, height, sizeof buf); |
2142 | 0 | fz_strlcat(buf, ";", sizeof buf); |
2143 | 0 | } |
2144 | 0 | if (width) |
2145 | 0 | { |
2146 | 0 | fz_strlcat(buf, "text-indent:", sizeof buf); |
2147 | 0 | fz_strlcat(buf, width, sizeof buf); |
2148 | 0 | fz_strlcat(buf, ";", sizeof buf); |
2149 | 0 | } |
2150 | 0 | if (align) |
2151 | 0 | { |
2152 | 0 | fz_strlcat(buf, "text-align:", sizeof buf); |
2153 | 0 | fz_strlcat(buf, align, sizeof buf); |
2154 | 0 | fz_strlcat(buf, ";", sizeof buf); |
2155 | 0 | } |
2156 | 0 | fz_xml_add_att(ctx, pool, node, "style", buf); |
2157 | 0 | } |
2158 | 0 | if (!strcmp(tag, "img")) |
2159 | 0 | { |
2160 | 0 | char *recindex = fz_xml_att(node, "recindex"); |
2161 | 0 | if (recindex) |
2162 | 0 | fz_xml_add_att(ctx, pool, node, "src", recindex); |
2163 | 0 | } |
2164 | 0 | } |
2165 | 0 | } |
2166 | |
|
2167 | 0 | down = fz_xml_down(node); |
2168 | 0 | if (down) |
2169 | 0 | patch_mobi_html(ctx, pool, down); |
2170 | |
|
2171 | 0 | node = fz_xml_next(node); |
2172 | 0 | } |
2173 | 0 | } |
2174 | | |
2175 | | static void |
2176 | | fz_parse_html_tree(fz_context *ctx, |
2177 | | fz_html_font_set *set, fz_archive *zip, const char *base_uri, fz_buffer *buf, const char *user_css, |
2178 | | int try_xml, int try_html5, fz_html_tree *tree, char **rtitle, int try_fictionbook, int patch_mobi) |
2179 | 0 | { |
2180 | 0 | fz_xml_doc *xml; |
2181 | |
|
2182 | 0 | if (rtitle) |
2183 | 0 | *rtitle = NULL; |
2184 | |
|
2185 | 0 | xml = parse_to_xml(ctx, buf, try_xml, try_html5); |
2186 | |
|
2187 | 0 | if (patch_mobi) |
2188 | 0 | patch_mobi_html(ctx, xml->u.doc.pool, fz_xml_root(xml)); |
2189 | |
|
2190 | 0 | fz_try(ctx) |
2191 | 0 | xml_to_boxes(ctx, set, zip, base_uri, user_css, xml, tree, rtitle, try_fictionbook, patch_mobi); |
2192 | 0 | fz_always(ctx) |
2193 | 0 | fz_drop_xml(ctx, xml); |
2194 | 0 | fz_catch(ctx) |
2195 | 0 | fz_rethrow(ctx); |
2196 | 0 | } |
2197 | | |
2198 | | #define fz_new_derived_html_tree(CTX, TYPE, DROP) \ |
2199 | 0 | ((TYPE *)Memento_label(fz_new_html_tree_of_size(CTX, sizeof(TYPE), DROP), #TYPE)) |
2200 | | |
2201 | | static fz_html_tree * |
2202 | | fz_new_html_tree_of_size(fz_context *ctx, size_t size, fz_store_drop_fn *drop) |
2203 | 0 | { |
2204 | 0 | fz_pool *pool = fz_new_pool(ctx); |
2205 | 0 | fz_html_tree *tree; |
2206 | |
|
2207 | 0 | fz_try(ctx) |
2208 | 0 | { |
2209 | 0 | tree = fz_pool_alloc(ctx, pool, size); |
2210 | 0 | FZ_INIT_STORABLE(tree, 1, drop); |
2211 | 0 | tree->pool = pool; |
2212 | 0 | } |
2213 | 0 | fz_catch(ctx) |
2214 | 0 | { |
2215 | 0 | fz_drop_pool(ctx, pool); |
2216 | 0 | fz_rethrow(ctx); |
2217 | 0 | } |
2218 | | |
2219 | 0 | return tree; |
2220 | 0 | } |
2221 | | |
2222 | | fz_html * |
2223 | | fz_parse_html(fz_context *ctx, |
2224 | | fz_html_font_set *set, fz_archive *zip, const char *base_uri, fz_buffer *buf, const char *user_css, |
2225 | | int try_xml, int try_html5, int patch_mobi) |
2226 | 0 | { |
2227 | 0 | fz_html *html = fz_new_derived_html_tree(ctx, fz_html, fz_drop_html_imp); |
2228 | |
|
2229 | 0 | html->layout_w = 0; |
2230 | 0 | html->layout_h = 0; |
2231 | 0 | html->layout_em = 0; |
2232 | |
|
2233 | 0 | fz_try(ctx) |
2234 | 0 | fz_parse_html_tree(ctx, set, zip, base_uri, buf, user_css, try_xml, try_html5, &html->tree, &html->title, 1, patch_mobi); |
2235 | 0 | fz_catch(ctx) |
2236 | 0 | { |
2237 | 0 | fz_drop_html(ctx, html); |
2238 | 0 | fz_rethrow(ctx); |
2239 | 0 | } |
2240 | | |
2241 | 0 | return html; |
2242 | 0 | } |
2243 | | |
2244 | | typedef struct |
2245 | | { |
2246 | | int saved; |
2247 | | fz_warning_cb *old; |
2248 | | void *arg; |
2249 | | fz_buffer *buffer; |
2250 | | fz_context *ctx; |
2251 | | } warning_save; |
2252 | | |
2253 | | static void |
2254 | | warn_to_buffer(void *user, const char *message) |
2255 | 0 | { |
2256 | 0 | warning_save *save = (warning_save *)user; |
2257 | 0 | fz_context *ctx = save->ctx; |
2258 | |
|
2259 | 0 | fz_try(ctx) |
2260 | 0 | { |
2261 | 0 | fz_append_string(ctx, save->buffer, message); |
2262 | 0 | fz_append_byte(ctx, save->buffer, '\n'); |
2263 | 0 | } |
2264 | 0 | fz_catch(ctx) |
2265 | 0 | { |
2266 | | /* Silently swallow the error. */ |
2267 | 0 | fz_rethrow_if(ctx, FZ_ERROR_SYSTEM); |
2268 | 0 | fz_report_error(ctx); |
2269 | 0 | } |
2270 | 0 | } |
2271 | | |
2272 | | static void |
2273 | | redirect_warnings_to_buffer(fz_context *ctx, fz_buffer *buf, warning_save *save) |
2274 | 0 | { |
2275 | 0 | save->saved = 1; |
2276 | 0 | save->old = fz_warning_callback(ctx, &save->arg); |
2277 | 0 | save->buffer = buf; |
2278 | 0 | save->ctx = ctx; |
2279 | |
|
2280 | 0 | fz_flush_warnings(ctx); |
2281 | 0 | fz_set_warning_callback(ctx, warn_to_buffer, save); |
2282 | 0 | } |
2283 | | |
2284 | | static void |
2285 | | restore_warnings(fz_context *ctx, warning_save *save) |
2286 | 0 | { |
2287 | 0 | if (!save->saved) |
2288 | 0 | return; |
2289 | | |
2290 | 0 | fz_flush_warnings(ctx); |
2291 | 0 | fz_set_warning_callback(ctx, save->old, save->arg); |
2292 | 0 | } |
2293 | | |
2294 | | fz_story * |
2295 | | fz_new_story(fz_context *ctx, fz_buffer *buf, const char *user_css, float em, fz_archive *zip) |
2296 | 0 | { |
2297 | 0 | fz_story *story = fz_new_derived_html_tree(ctx, fz_story, fz_drop_story_imp); |
2298 | 0 | warning_save saved = { 0 }; |
2299 | 0 | fz_buffer *local_buffer = NULL; |
2300 | |
|
2301 | 0 | if (buf == NULL) |
2302 | 0 | { |
2303 | 0 | local_buffer = fz_new_buffer(ctx, 0); |
2304 | 0 | buf = local_buffer; |
2305 | 0 | } |
2306 | |
|
2307 | 0 | fz_var(local_buffer); |
2308 | 0 | fz_var(saved); |
2309 | |
|
2310 | 0 | fz_try(ctx) |
2311 | 0 | { |
2312 | 0 | story->zip = fz_keep_archive(ctx, zip); |
2313 | 0 | story->font_set = fz_new_html_font_set(ctx); |
2314 | 0 | story->em = em; |
2315 | 0 | story->user_css = user_css ? fz_strdup(ctx, user_css) : NULL; |
2316 | 0 | story->warnings = fz_new_buffer(ctx, 128); |
2317 | 0 | redirect_warnings_to_buffer(ctx, story->warnings, &saved); |
2318 | 0 | story->dom = parse_to_xml(ctx, buf, 0, 1); |
2319 | 0 | } |
2320 | 0 | fz_always(ctx) |
2321 | 0 | { |
2322 | 0 | restore_warnings(ctx, &saved); |
2323 | 0 | fz_drop_buffer(ctx, local_buffer); |
2324 | 0 | } |
2325 | 0 | fz_catch(ctx) |
2326 | 0 | { |
2327 | 0 | fz_drop_html_tree(ctx, &story->tree); |
2328 | 0 | fz_rethrow(ctx); |
2329 | 0 | } |
2330 | | |
2331 | 0 | return story; |
2332 | 0 | } |
2333 | | |
2334 | | fz_html * |
2335 | | fz_parse_xhtml(fz_context *ctx, fz_html_font_set *set, fz_archive *zip, const char *base_uri, fz_buffer *buf, const char *user_css) |
2336 | 0 | { |
2337 | | /* try as XML first, fall back to HTML5 */ |
2338 | 0 | return fz_parse_html(ctx, set, zip, base_uri, buf, user_css, 1, 1, 0); |
2339 | 0 | } |
2340 | | |
2341 | | static void indent(int level) |
2342 | 0 | { |
2343 | 0 | while (level-- > 0) |
2344 | 0 | putchar('\t'); |
2345 | 0 | } |
2346 | | |
2347 | | static void |
2348 | | fz_debug_html_flow(fz_context *ctx, fz_html_flow *flow, int level) |
2349 | 0 | { |
2350 | 0 | fz_html_box *sbox = NULL; |
2351 | 0 | while (flow) |
2352 | 0 | { |
2353 | 0 | if (flow->box != sbox) { |
2354 | 0 | sbox = flow->box; |
2355 | 0 | indent(level); |
2356 | 0 | #ifndef NDEBUG |
2357 | 0 | printf("@style <%s> em=%g font='%s'", sbox->tag, sbox->s.layout.em, fz_font_name(ctx, sbox->style->font)); |
2358 | | #else |
2359 | | printf("@style em=%g font='%s'", sbox->s.layout.em, fz_font_name(ctx, sbox->style->font)); |
2360 | | #endif |
2361 | 0 | if (fz_font_is_serif(ctx, sbox->style->font)) |
2362 | 0 | printf(" serif"); |
2363 | 0 | else |
2364 | 0 | printf(" sans"); |
2365 | 0 | if (fz_font_is_monospaced(ctx, sbox->style->font)) |
2366 | 0 | printf(" monospaced"); |
2367 | 0 | if (fz_font_is_bold(ctx, sbox->style->font)) |
2368 | 0 | printf(" bold"); |
2369 | 0 | if (fz_font_is_italic(ctx, sbox->style->font)) |
2370 | 0 | printf(" italic"); |
2371 | 0 | if (sbox->style->small_caps) |
2372 | 0 | printf(" small-caps"); |
2373 | 0 | printf("\n"); |
2374 | 0 | } |
2375 | |
|
2376 | 0 | indent(level); |
2377 | 0 | switch (flow->type) { |
2378 | 0 | case FLOW_WORD: printf("word "); break; |
2379 | 0 | case FLOW_SPACE: printf("space"); break; |
2380 | 0 | case FLOW_SBREAK: printf("sbrk "); break; |
2381 | 0 | case FLOW_SHYPHEN: printf("shy "); break; |
2382 | 0 | case FLOW_BREAK: printf("break"); break; |
2383 | 0 | case FLOW_IMAGE: printf("image"); break; |
2384 | 0 | case FLOW_ANCHOR: printf("anchor"); break; |
2385 | 0 | } |
2386 | 0 | printf(" script=%d", flow->script); |
2387 | | // printf(" y=%g x=%g w=%g", flow->y, flow->x, flow->w); |
2388 | 0 | if (flow->type == FLOW_IMAGE) |
2389 | 0 | printf(" h=%g", flow->h); |
2390 | 0 | if (flow->type == FLOW_WORD) |
2391 | 0 | printf(" text='%s'", flow->content.text); |
2392 | 0 | printf("\n"); |
2393 | 0 | if (flow->breaks_line) { |
2394 | 0 | indent(level); |
2395 | 0 | printf("*\n"); |
2396 | 0 | } |
2397 | |
|
2398 | 0 | flow = flow->next; |
2399 | 0 | } |
2400 | 0 | } |
2401 | | |
2402 | | fz_structure fz_html_tag_to_structure(const char *tag) |
2403 | 0 | { |
2404 | 0 | if (!strcmp(tag, "body")) return FZ_STRUCTURE_DOCUMENT; |
2405 | 0 | if (!strcmp(tag, "div")) return FZ_STRUCTURE_DIV; |
2406 | 0 | if (!strcmp(tag, "span")) return FZ_STRUCTURE_SPAN; |
2407 | 0 | if (!strcmp(tag, "blockquote")) return FZ_STRUCTURE_BLOCKQUOTE; |
2408 | 0 | if (!strcmp(tag, "p")) return FZ_STRUCTURE_P; |
2409 | 0 | if (!strcmp(tag, "h1")) return FZ_STRUCTURE_H1; |
2410 | 0 | if (!strcmp(tag, "h2")) return FZ_STRUCTURE_H2; |
2411 | 0 | if (!strcmp(tag, "h3")) return FZ_STRUCTURE_H3; |
2412 | 0 | if (!strcmp(tag, "h4")) return FZ_STRUCTURE_H4; |
2413 | 0 | if (!strcmp(tag, "h5")) return FZ_STRUCTURE_H5; |
2414 | 0 | if (!strcmp(tag, "h6")) return FZ_STRUCTURE_H6; |
2415 | 0 | if (!strcmp(tag, "ol")) return FZ_STRUCTURE_LIST; |
2416 | 0 | if (!strcmp(tag, "ul")) return FZ_STRUCTURE_LIST; |
2417 | 0 | if (!strcmp(tag, "dl")) return FZ_STRUCTURE_LIST; |
2418 | 0 | if (!strcmp(tag, "li")) return FZ_STRUCTURE_LISTITEM; |
2419 | 0 | if (!strcmp(tag, "table")) return FZ_STRUCTURE_TABLE; |
2420 | 0 | if (!strcmp(tag, "tr")) return FZ_STRUCTURE_TR; |
2421 | 0 | if (!strcmp(tag, "th")) return FZ_STRUCTURE_TH; |
2422 | 0 | if (!strcmp(tag, "td")) return FZ_STRUCTURE_TD; |
2423 | 0 | if (!strcmp(tag, "thead")) return FZ_STRUCTURE_THEAD; |
2424 | 0 | if (!strcmp(tag, "tbody")) return FZ_STRUCTURE_TBODY; |
2425 | 0 | if (!strcmp(tag, "tfoot")) return FZ_STRUCTURE_TFOOT; |
2426 | 0 | return FZ_STRUCTURE_INVALID; |
2427 | 0 | } |
2428 | | |
2429 | | static void fz_debug_css_number(int level, const char *label, fz_css_number number) |
2430 | 0 | { |
2431 | 0 | if (number.unit == N_UNDEFINED || number.unit == N_AUTO) |
2432 | 0 | return; |
2433 | 0 | indent(level+1); |
2434 | 0 | printf(">%s: ", label); |
2435 | 0 | switch (number.unit) { |
2436 | 0 | default: |
2437 | 0 | case N_NUMBER: printf("%g (num)\n", number.value); break; |
2438 | 0 | case N_LENGTH: printf("%g (len)\n", number.value); break; |
2439 | 0 | case N_SCALE: printf("%g (scale)\n", number.value); break; |
2440 | 0 | case N_PERCENT: printf("%g%%\n", number.value * 0.01f); break; |
2441 | 0 | } |
2442 | 0 | } |
2443 | | |
2444 | | static void |
2445 | | fz_debug_html_box(fz_context *ctx, fz_html_box *box, int level) |
2446 | 0 | { |
2447 | 0 | while (box) |
2448 | 0 | { |
2449 | 0 | indent(level); |
2450 | 0 | printf("box "); |
2451 | | #ifdef DEBUG_HTML_SEQ |
2452 | | printf("seq=%d ", box->seq); |
2453 | | #endif |
2454 | 0 | switch (box->type) { |
2455 | 0 | case BOX_BLOCK: printf("block"); break; |
2456 | 0 | case BOX_FLOW: printf("flow"); break; |
2457 | 0 | case BOX_INLINE: printf("inline"); break; |
2458 | 0 | case BOX_TABLE: printf("table"); break; |
2459 | 0 | case BOX_TABLE_ROW: printf("table-row"); break; |
2460 | 0 | case BOX_TABLE_CELL: printf("table-cell"); break; |
2461 | 0 | } |
2462 | | |
2463 | 0 | printf(" <%s>", box->tag); |
2464 | | // printf(" em=%g", box->em); |
2465 | | // printf(" x=%g y=%g w=%g b=%g", box->x, box->y, box->w, box->b); |
2466 | |
|
2467 | 0 | if (box->markup_dir == FZ_BIDI_RTL) |
2468 | 0 | printf(" rtl"); |
2469 | 0 | if (box->is_first_flow) |
2470 | 0 | printf(" is-first-flow"); |
2471 | 0 | if (box->list_item) |
2472 | 0 | printf(" list=%d", box->list_item); |
2473 | 0 | if (box->id) |
2474 | 0 | printf(" id=(%s)", box->id); |
2475 | 0 | if (box->href) |
2476 | 0 | printf(" href=(%s)", box->href); |
2477 | 0 | printf("\n"); |
2478 | |
|
2479 | 0 | if (box->type == BOX_BLOCK || box->type == BOX_TABLE || box->type == BOX_TABLE_CELL) { |
2480 | 0 | if (box->style->background_color.a != 0) |
2481 | 0 | { |
2482 | 0 | indent(level+1); |
2483 | 0 | printf(">background-color=#%02x%02x%02x%02x\n", |
2484 | 0 | box->style->background_color.a, |
2485 | 0 | box->style->background_color.r, |
2486 | 0 | box->style->background_color.g, |
2487 | 0 | box->style->background_color.b); |
2488 | 0 | } |
2489 | 0 | if (box->style->position != POS_STATIC) |
2490 | 0 | { |
2491 | 0 | indent(level+1); |
2492 | 0 | printf(">position: %s\n", box->style->position == POS_RELATIVE ? "relative" : |
2493 | 0 | box->style->position == POS_FIXED ? "fixed" : "absolute"); |
2494 | 0 | } |
2495 | 0 | fz_debug_css_number(level, "width", box->style->width); |
2496 | 0 | fz_debug_css_number(level, "height", box->style->height); |
2497 | 0 | if (box->u.block.margin[0] != 0 || box->u.block.margin[1] != 0 || box->u.block.margin[2] != 0 || box->u.block.margin[3] != 0) |
2498 | 0 | { |
2499 | 0 | indent(level+1); |
2500 | 0 | printf(">margin=(%g %g %g %g)\n", box->u.block.margin[0], box->u.block.margin[1], box->u.block.margin[2], box->u.block.margin[3]); |
2501 | 0 | } |
2502 | 0 | if (box->u.block.border[0] != 0 || box->u.block.border[1] != 0 || box->u.block.border[2] != 0 || box->u.block.border[3] != 0) |
2503 | 0 | { |
2504 | 0 | indent(level+1); |
2505 | 0 | printf(">border=(%g %g %g %g) #%02x%02x%02x%02x\n", |
2506 | 0 | box->u.block.border[0], box->u.block.border[1], box->u.block.border[2], box->u.block.border[3], |
2507 | 0 | box->style->border_color->a, box->style->border_color->r, box->style->border_color->g, box->style->border_color->b); |
2508 | 0 | } |
2509 | 0 | if (box->u.block.padding[0] != 0 || box->u.block.padding[1] != 0 || box->u.block.padding[2] != 0 || box->u.block.padding[3] != 0) |
2510 | 0 | { |
2511 | 0 | indent(level+1); |
2512 | 0 | printf(">padding=(%g %g %g %g)\n", box->u.block.padding[0], box->u.block.padding[1], box->u.block.padding[2], box->u.block.padding[3]); |
2513 | 0 | } |
2514 | 0 | } |
2515 | 0 | indent(level+1); |
2516 | 0 | printf(">layout=(%g %g)->(%g %g)\n", box->s.layout.x, box->s.layout.y, box->s.layout.w + box->s.layout.x, box->s.layout.b); |
2517 | |
|
2518 | 0 | if (box->down) |
2519 | 0 | fz_debug_html_box(ctx, box->down, level + 1); |
2520 | 0 | if (box->type == BOX_FLOW) { |
2521 | 0 | indent(level+1); |
2522 | 0 | printf("flow\n"); |
2523 | 0 | fz_debug_html_flow(ctx, box->u.flow.head, level + 2); |
2524 | 0 | } |
2525 | |
|
2526 | 0 | box = box->next; |
2527 | 0 | } |
2528 | 0 | } |
2529 | | |
2530 | | void |
2531 | | fz_debug_html(fz_context *ctx, fz_html_box *box) |
2532 | 0 | { |
2533 | 0 | fz_debug_html_box(ctx, box, 0); |
2534 | 0 | } |
2535 | | |
2536 | | static size_t |
2537 | | fz_html_size(fz_context *ctx, fz_html *html) |
2538 | 0 | { |
2539 | 0 | return html ? fz_pool_size(ctx, html->tree.pool) : 0; |
2540 | 0 | } |
2541 | | |
2542 | | /* Magic to make html storable. */ |
2543 | | typedef struct { |
2544 | | int refs; |
2545 | | void *doc; |
2546 | | int chapter_num; |
2547 | | } fz_html_key; |
2548 | | |
2549 | | static int |
2550 | | fz_make_hash_html_key(fz_context *ctx, fz_store_hash *hash, void *key_) |
2551 | 0 | { |
2552 | 0 | fz_html_key *key = (fz_html_key *)key_; |
2553 | 0 | hash->u.pi.ptr = key->doc; |
2554 | 0 | hash->u.pi.i = key->chapter_num; |
2555 | 0 | return 1; |
2556 | 0 | } |
2557 | | |
2558 | | static void * |
2559 | | fz_keep_html_key(fz_context *ctx, void *key_) |
2560 | 0 | { |
2561 | 0 | fz_html_key *key = (fz_html_key *)key_; |
2562 | 0 | return fz_keep_imp(ctx, key, &key->refs); |
2563 | 0 | } |
2564 | | |
2565 | | static void |
2566 | | fz_drop_html_key(fz_context *ctx, void *key_) |
2567 | 0 | { |
2568 | 0 | fz_html_key *key = (fz_html_key *)key_; |
2569 | 0 | if (fz_drop_imp(ctx, key, &key->refs)) |
2570 | 0 | { |
2571 | 0 | fz_free(ctx, key); |
2572 | 0 | } |
2573 | 0 | } |
2574 | | |
2575 | | static int |
2576 | | fz_cmp_html_key(fz_context *ctx, void *k0_, void *k1_) |
2577 | 0 | { |
2578 | 0 | fz_html_key *k0 = (fz_html_key *)k0_; |
2579 | 0 | fz_html_key *k1 = (fz_html_key *)k1_; |
2580 | 0 | return k0->doc == k1->doc && k0->chapter_num == k1->chapter_num; |
2581 | 0 | } |
2582 | | |
2583 | | static void |
2584 | | fz_format_html_key(fz_context *ctx, char *s, size_t n, void *key_) |
2585 | 0 | { |
2586 | 0 | fz_html_key *key = (fz_html_key *)key_; |
2587 | 0 | fz_snprintf(s, n, "(html doc=%p, ch=%d)", key->doc, key->chapter_num); |
2588 | 0 | } |
2589 | | |
2590 | | static const fz_store_type fz_html_store_type = |
2591 | | { |
2592 | | "fz_html", |
2593 | | fz_make_hash_html_key, |
2594 | | fz_keep_html_key, |
2595 | | fz_drop_html_key, |
2596 | | fz_cmp_html_key, |
2597 | | fz_format_html_key, |
2598 | | NULL |
2599 | | }; |
2600 | | |
2601 | | fz_html *fz_store_html(fz_context *ctx, fz_html *html, void *doc, int chapter) |
2602 | 0 | { |
2603 | 0 | fz_html_key *key = NULL; |
2604 | 0 | fz_html *other_html; |
2605 | | |
2606 | | /* Stick the parsed html in the store */ |
2607 | 0 | fz_var(key); |
2608 | |
|
2609 | 0 | fz_try(ctx) |
2610 | 0 | { |
2611 | 0 | key = fz_malloc_struct(ctx, fz_html_key); |
2612 | 0 | key->refs = 1; |
2613 | 0 | key->doc = doc; |
2614 | 0 | key->chapter_num = chapter; |
2615 | 0 | other_html = fz_store_item(ctx, key, html, fz_html_size(ctx, html), &fz_html_store_type); |
2616 | 0 | if (other_html) |
2617 | 0 | { |
2618 | 0 | fz_drop_html(ctx, html); |
2619 | 0 | html = other_html; |
2620 | 0 | } |
2621 | 0 | } |
2622 | 0 | fz_always(ctx) |
2623 | 0 | fz_drop_html_key(ctx, key); |
2624 | 0 | fz_catch(ctx) |
2625 | 0 | { |
2626 | | /* Do nothing */ |
2627 | 0 | } |
2628 | |
|
2629 | 0 | return html; |
2630 | 0 | } |
2631 | | |
2632 | | fz_html *fz_find_html(fz_context *ctx, void *doc, int chapter) |
2633 | 0 | { |
2634 | 0 | fz_html_key key; |
2635 | |
|
2636 | 0 | key.refs = 1; |
2637 | 0 | key.doc = doc; |
2638 | 0 | key.chapter_num = chapter; |
2639 | 0 | return fz_find_item(ctx, &fz_drop_html_imp, &key, &fz_html_store_type); |
2640 | 0 | } |
2641 | | |
2642 | | static int |
2643 | | html_filter_store(fz_context *ctx, void *doc, void *key_) |
2644 | 0 | { |
2645 | 0 | fz_html_key *key = (fz_html_key *)key_; |
2646 | |
|
2647 | 0 | return (doc == key->doc); |
2648 | 0 | } |
2649 | | |
2650 | | void fz_purge_stored_html(fz_context *ctx, void *doc) |
2651 | 0 | { |
2652 | 0 | fz_filter_store(ctx, html_filter_store, doc, &fz_html_store_type); |
2653 | 0 | } |
2654 | | |
2655 | | static void |
2656 | | convert_to_boxes(fz_context *ctx, fz_story *story) |
2657 | 0 | { |
2658 | 0 | warning_save saved = { 0 }; |
2659 | |
|
2660 | 0 | if (story->dom == NULL) |
2661 | 0 | return; |
2662 | | |
2663 | 0 | fz_var(saved); |
2664 | |
|
2665 | 0 | fz_try(ctx) |
2666 | 0 | { |
2667 | 0 | redirect_warnings_to_buffer(ctx, story->warnings, &saved); |
2668 | 0 | xml_to_boxes(ctx, story->font_set, story->zip, ".", story->user_css, story->dom, &story->tree, NULL, 0, 0); |
2669 | 0 | } |
2670 | 0 | fz_always(ctx) |
2671 | 0 | { |
2672 | 0 | fz_drop_xml(ctx, story->dom); |
2673 | 0 | story->dom = NULL; |
2674 | 0 | restore_warnings(ctx, &saved); |
2675 | 0 | } |
2676 | 0 | fz_catch(ctx) |
2677 | 0 | fz_rethrow(ctx); |
2678 | 0 | } |
2679 | | |
2680 | | int fz_place_story(fz_context *ctx, fz_story *story, fz_rect where, fz_rect *filled) |
2681 | 0 | { |
2682 | 0 | return fz_place_story_flags(ctx, story, where, filled, 0); |
2683 | 0 | } |
2684 | | |
2685 | | int fz_place_story_flags(fz_context *ctx, fz_story *story, fz_rect where, fz_rect *filled, int flags) |
2686 | 0 | { |
2687 | 0 | float w, h; |
2688 | |
|
2689 | 0 | if (filled) |
2690 | 0 | *filled = fz_empty_rect; |
2691 | |
|
2692 | 0 | if (story == NULL || story->complete) |
2693 | 0 | return 0; |
2694 | | |
2695 | | /* Convert from XML to box model on the first attempt to place. |
2696 | | * The DOM is unusable from here on in. */ |
2697 | 0 | convert_to_boxes(ctx, story); |
2698 | |
|
2699 | 0 | w = where.x1 - where.x0; |
2700 | 0 | h = where.y1 - where.y0; |
2701 | | /* Confusingly, we call the layout using restart_draw, not restart_place, |
2702 | | * because we don't want to destroy the current values in restart_place |
2703 | | * in case we have to retry later. This means the values are left in |
2704 | | * the correct struct though! */ |
2705 | 0 | story->restart_draw.start = story->restart_place.start; |
2706 | 0 | story->restart_draw.start_flow = story->restart_place.start_flow; |
2707 | 0 | story->restart_draw.start_flags = story->restart_place.start_flags; |
2708 | 0 | story->restart_draw.end = NULL; |
2709 | 0 | story->restart_draw.end_flow = NULL; |
2710 | 0 | story->restart_draw.end_flags = 0; |
2711 | 0 | story->restart_draw.reason = FZ_HTML_RESTART_REASON_NONE; |
2712 | 0 | story->restart_draw.flags = flags; |
2713 | 0 | story->bbox = where; |
2714 | 0 | fz_restartable_layout_html(ctx, &story->tree, where.x0, where.y0, w, h, story->em, &story->restart_draw); |
2715 | 0 | story->restart_draw.start = story->restart_place.start; |
2716 | 0 | story->restart_draw.start_flow = story->restart_place.start_flow; |
2717 | 0 | story->restart_draw.start_flags = story->restart_place.start_flags; |
2718 | |
|
2719 | 0 | if (filled) |
2720 | 0 | { |
2721 | 0 | fz_html_box *b = story->tree.root; |
2722 | 0 | filled->x0 = b->s.layout.x - b->u.block.margin[L] - b->u.block.border[L] - b->u.block.padding[L]; |
2723 | 0 | filled->x1 = b->s.layout.w + b->u.block.margin[R] + b->u.block.border[R] + b->u.block.padding[R] + b->s.layout.x; |
2724 | 0 | filled->y0 = b->s.layout.y - b->u.block.margin[T] - b->u.block.border[T] - b->u.block.padding[T]; |
2725 | 0 | filled->y1 = b->s.layout.b + b->u.block.margin[B] + b->u.block.border[B] + b->u.block.padding[B]; |
2726 | 0 | } |
2727 | |
|
2728 | 0 | #ifndef NDEBUG |
2729 | 0 | if (fz_atoi(getenv("FZ_DEBUG_HTML"))) |
2730 | 0 | fz_debug_html(ctx, story->tree.root); |
2731 | 0 | #endif |
2732 | |
|
2733 | 0 | if (story->restart_draw.end == NULL) |
2734 | 0 | return FZ_HTML_RESTART_REASON_NONE; |
2735 | 0 | if (story->restart_draw.reason == FZ_HTML_RESTART_REASON_LINE_WIDTH) |
2736 | 0 | return FZ_HTML_RESTART_REASON_LINE_WIDTH; |
2737 | 0 | return FZ_HTML_RESTART_REASON_LINE_HEIGHT; |
2738 | 0 | } |
2739 | | |
2740 | | const char * |
2741 | | fz_story_warnings(fz_context *ctx, fz_story *story) |
2742 | 0 | { |
2743 | 0 | unsigned char *data; |
2744 | |
|
2745 | 0 | if (!story) |
2746 | 0 | return NULL; |
2747 | | |
2748 | 0 | convert_to_boxes(ctx, story); |
2749 | |
|
2750 | 0 | fz_terminate_buffer(ctx, story->warnings); |
2751 | |
|
2752 | 0 | if (fz_buffer_storage(ctx, story->warnings, &data) == 0) |
2753 | 0 | return NULL; |
2754 | | |
2755 | 0 | return (const char *)data; |
2756 | 0 | } |