Coverage Report

Created: 2025-01-28 06:17

/src/mupdf/source/html/html-imp.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright (C) 2004-2024 Artifex Software, Inc.
2
//
3
// This file is part of MuPDF.
4
//
5
// MuPDF is free software: you can redistribute it and/or modify it under the
6
// terms of the GNU Affero General Public License as published by the Free
7
// Software Foundation, either version 3 of the License, or (at your option)
8
// any later version.
9
//
10
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13
// details.
14
//
15
// You should have received a copy of the GNU Affero General Public License
16
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17
//
18
// Alternative licensing terms are available from the licensor.
19
// For commercial licensing, see <https://www.artifex.com/> or contact
20
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21
// CA 94129, USA, for further information.
22
23
#ifndef SOURCE_HTML_IMP_H
24
#define SOURCE_HTML_IMP_H
25
26
#include "mupdf/fitz.h"
27
#include "mupdf/html.h"
28
29
#include "../fitz/xml-imp.h"
30
31
typedef struct fz_html_font_face_s fz_html_font_face;
32
typedef struct fz_html_box_s fz_html_box;
33
typedef struct fz_html_flow_s fz_html_flow;
34
typedef struct fz_css_style_splay_s fz_css_style_splay;
35
36
typedef struct fz_css_s fz_css;
37
typedef struct fz_css_rule_s fz_css_rule;
38
typedef struct fz_css_match_s fz_css_match;
39
typedef struct fz_css_style_s fz_css_style;
40
41
typedef struct fz_css_selector_s fz_css_selector;
42
typedef struct fz_css_condition_s fz_css_condition;
43
typedef struct fz_css_property_s fz_css_property;
44
typedef struct fz_css_value_s fz_css_value;
45
typedef struct fz_css_number_s fz_css_number;
46
typedef struct fz_css_color_s fz_css_color;
47
48
struct fz_html_font_face_s
49
{
50
  char *family;
51
  int is_bold;
52
  int is_italic;
53
  int is_small_caps;
54
  fz_font *font;
55
  char *src;
56
  fz_html_font_face *next;
57
};
58
59
struct fz_html_font_set_s
60
{
61
  fz_font *fonts[12]; /* Times, Helvetica, Courier in R,I,B,BI */
62
  fz_html_font_face *custom;
63
};
64
65
554
#define UCS_MAX 0x10ffff
66
67
enum
68
{
69
  CSS_KEYWORD = UCS_MAX+1,
70
  CSS_HASH,
71
  CSS_STRING,
72
  CSS_NUMBER,
73
  CSS_LENGTH,
74
  CSS_PERCENT,
75
  CSS_URI,
76
};
77
78
struct fz_css_s
79
{
80
  fz_pool *pool;
81
  fz_css_rule *rule;
82
};
83
84
struct fz_css_rule_s
85
{
86
  fz_css_selector *selector;
87
  fz_css_property *declaration;
88
  fz_css_rule *next;
89
  int loaded;
90
};
91
92
struct fz_css_selector_s
93
{
94
  char *name;
95
  int combine;
96
  fz_css_condition *cond;
97
  fz_css_selector *left;
98
  fz_css_selector *right;
99
  fz_css_selector *next;
100
};
101
102
struct fz_css_condition_s
103
{
104
  int type;
105
  char *key;
106
  char *val;
107
  fz_css_condition *next;
108
};
109
110
struct fz_css_property_s
111
{
112
  int name;
113
  fz_css_value *value;
114
  short spec;
115
  short important;
116
  fz_css_property *next;
117
};
118
119
struct fz_css_value_s
120
{
121
  int type;
122
  char *data;
123
  fz_css_value *args; /* function arguments */
124
  fz_css_value *next;
125
};
126
127
enum
128
{
129
  PRO_BACKGROUND_COLOR,
130
  PRO_BORDER_BOTTOM_COLOR,
131
  PRO_BORDER_BOTTOM_STYLE,
132
  PRO_BORDER_BOTTOM_WIDTH,
133
  PRO_BORDER_LEFT_COLOR,
134
  PRO_BORDER_LEFT_STYLE,
135
  PRO_BORDER_LEFT_WIDTH,
136
  PRO_BORDER_RIGHT_COLOR,
137
  PRO_BORDER_RIGHT_STYLE,
138
  PRO_BORDER_RIGHT_WIDTH,
139
  PRO_BORDER_TOP_COLOR,
140
  PRO_BORDER_TOP_STYLE,
141
  PRO_BORDER_TOP_WIDTH,
142
  PRO_BORDER_SPACING,
143
  PRO_COLOR,
144
  PRO_DIRECTION,
145
  PRO_DISPLAY,
146
  PRO_FONT,
147
  PRO_FONT_FAMILY,
148
  PRO_FONT_SIZE,
149
  PRO_FONT_STYLE,
150
  PRO_FONT_VARIANT,
151
  PRO_FONT_WEIGHT,
152
  PRO_HEIGHT,
153
  PRO_LEADING,
154
  PRO_LETTER_SPACING,
155
  PRO_LINE_HEIGHT,
156
  PRO_LIST_STYLE_IMAGE,
157
  PRO_LIST_STYLE_POSITION,
158
  PRO_LIST_STYLE_TYPE,
159
  PRO_MARGIN_BOTTOM,
160
  PRO_MARGIN_LEFT,
161
  PRO_MARGIN_RIGHT,
162
  PRO_MARGIN_TOP,
163
  PRO_ORPHANS,
164
  PRO_OVERFLOW_WRAP,
165
  PRO_PADDING_BOTTOM,
166
  PRO_PADDING_LEFT,
167
  PRO_PADDING_RIGHT,
168
  PRO_PADDING_TOP,
169
  PRO_PAGE_BREAK_AFTER,
170
  PRO_PAGE_BREAK_BEFORE,
171
  PRO_QUOTES,
172
  PRO_SRC,
173
  PRO_TEXT_ALIGN,
174
  PRO_TEXT_DECORATION,
175
  PRO_TEXT_FILL_COLOR,
176
  PRO_TEXT_INDENT,
177
  PRO_TEXT_TRANSFORM,
178
  PRO_TEXT_STROKE_WIDTH,
179
  PRO_TEXT_STROKE_COLOR,
180
  PRO_VERTICAL_ALIGN,
181
  PRO_VISIBILITY,
182
  PRO_WHITE_SPACE,
183
  PRO_WIDOWS,
184
  PRO_WIDTH,
185
  PRO_WORD_SPACING,
186
187
  /* Number of real properties. */
188
  NUM_PROPERTIES,
189
190
  /* Short-hand properties (always expanded when applied, never used as is): */
191
  PRO_BORDER,
192
  PRO_BORDER_BOTTOM,
193
  PRO_BORDER_COLOR,
194
  PRO_BORDER_LEFT,
195
  PRO_BORDER_RIGHT,
196
  PRO_BORDER_STYLE,
197
  PRO_BORDER_TOP,
198
  PRO_BORDER_WIDTH,
199
  PRO_LIST_STYLE,
200
  PRO_MARGIN,
201
  PRO_PADDING,
202
};
203
204
struct fz_css_match_s
205
{
206
  fz_css_match *up;
207
  short spec[NUM_PROPERTIES];
208
  fz_css_value *value[NUM_PROPERTIES];
209
};
210
211
enum { DIS_NONE, DIS_BLOCK, DIS_INLINE, DIS_LIST_ITEM, DIS_INLINE_BLOCK, DIS_TABLE, DIS_TABLE_GROUP, DIS_TABLE_ROW, DIS_TABLE_CELL };
212
enum { POS_STATIC, POS_RELATIVE, POS_ABSOLUTE, POS_FIXED };
213
enum { TA_LEFT, TA_RIGHT, TA_CENTER, TA_JUSTIFY };
214
enum { VA_BASELINE, VA_SUB, VA_SUPER, VA_TOP, VA_BOTTOM, VA_TEXT_TOP, VA_TEXT_BOTTOM };
215
enum { BS_NONE, BS_SOLID };
216
enum { V_VISIBLE, V_HIDDEN, V_COLLAPSE };
217
enum { PB_AUTO, PB_ALWAYS, PB_AVOID, PB_LEFT, PB_RIGHT };
218
enum { TD_NONE, TD_UNDERLINE, TD_LINE_THROUGH };
219
220
enum {
221
  WS_COLLAPSE = 1,
222
  WS_ALLOW_BREAK_SPACE = 2,
223
  WS_FORCE_BREAK_NEWLINE = 4,
224
  WS_NORMAL = WS_COLLAPSE | WS_ALLOW_BREAK_SPACE,
225
  WS_PRE = WS_FORCE_BREAK_NEWLINE,
226
  WS_NOWRAP = WS_COLLAPSE,
227
  WS_PRE_WRAP = WS_ALLOW_BREAK_SPACE | WS_FORCE_BREAK_NEWLINE,
228
  WS_PRE_LINE = WS_COLLAPSE | WS_ALLOW_BREAK_SPACE | WS_FORCE_BREAK_NEWLINE
229
};
230
231
enum {
232
  LST_NONE,
233
  LST_DISC, LST_CIRCLE, LST_SQUARE,
234
  LST_DECIMAL, LST_DECIMAL_ZERO,
235
  LST_LC_ROMAN, LST_UC_ROMAN,
236
  LST_LC_GREEK, LST_UC_GREEK,
237
  LST_LC_LATIN, LST_UC_LATIN,
238
  LST_LC_ALPHA, LST_UC_ALPHA,
239
  LST_ARMENIAN, LST_GEORGIAN,
240
};
241
242
enum {
243
  OVERFLOW_WRAP_NORMAL = 0,
244
  OVERFLOW_WRAP_BREAK_WORD = 1
245
  /* We do not support 'anywhere'. */
246
};
247
248
enum { N_NUMBER='u', N_LENGTH='p', N_SCALE='m', N_PERCENT='%', N_AUTO='a', N_UNDEFINED='x' };
249
250
struct fz_css_number_s
251
{
252
  float value;
253
  int unit;
254
};
255
256
struct fz_css_color_s
257
{
258
  unsigned char r, g, b, a;
259
};
260
261
struct fz_css_style_s
262
{
263
  fz_css_number font_size;
264
  fz_css_number width, height;
265
  fz_css_number margin[4];
266
  fz_css_number padding[4];
267
  fz_css_number border_width[4];
268
  fz_css_number border_spacing;
269
  fz_css_number text_indent;
270
  fz_css_number text_stroke_width;
271
  unsigned int visibility : 2;
272
  unsigned int white_space : 3;
273
  unsigned int text_align : 2;
274
  unsigned int vertical_align : 3;
275
  unsigned int list_style_type : 4;
276
  unsigned int page_break_before : 3;
277
  unsigned int page_break_after : 3;
278
  unsigned int border_style_0 : 1;
279
  unsigned int border_style_1 : 1;
280
  unsigned int border_style_2 : 1;
281
  unsigned int border_style_3 : 1;
282
  unsigned int small_caps : 1;
283
  unsigned int text_decoration: 2;
284
  unsigned int overflow_wrap : 1;
285
  /* Ensure the extra bits in the bitfield are copied
286
   * on structure copies. */
287
  unsigned int blank : 3;
288
  fz_css_number line_height;
289
  fz_css_number leading;
290
  fz_css_color background_color;
291
  fz_css_color border_color[4];
292
  fz_css_color color;
293
  fz_css_color text_fill_color;
294
  fz_css_color text_stroke_color;
295
  fz_font *font;
296
};
297
298
struct fz_css_style_splay_s {
299
  fz_css_style style;
300
  fz_css_style_splay *lt;
301
  fz_css_style_splay *gt;
302
  fz_css_style_splay *up;
303
};
304
305
enum
306
{
307
  BOX_BLOCK,    /* block-level: contains block, break, flow, and table boxes */
308
  BOX_FLOW,   /* block-level: contains only inline boxes */
309
  BOX_INLINE,   /* inline-level: contains only inline boxes */
310
  BOX_TABLE,    /* table: contains table-row */
311
  BOX_TABLE_ROW,    /* table-row: contains table-cell */
312
  BOX_TABLE_CELL,   /* table-cell: contains block */
313
};
314
315
typedef struct
316
{
317
  fz_storable storable;
318
  fz_pool *pool; /* pool allocator for this html tree */
319
  fz_html_box *root;
320
} fz_html_tree;
321
322
struct fz_html_s
323
{
324
  /* fz_html is derived from fz_html_tree, so must start with that. */
325
  /* Arguably 'tree' should be called 'super'. */
326
  fz_html_tree tree;
327
328
  float page_w, page_h;
329
  float layout_w, layout_h, layout_em;
330
  float page_margin[4];
331
  char *title;
332
};
333
334
typedef enum
335
{
336
  FZ_HTML_RESTART_REASON_NONE = 0,
337
  FZ_HTML_RESTART_REASON_LINE_HEIGHT = 1,
338
  FZ_HTML_RESTART_REASON_LINE_WIDTH = 2
339
} fz_html_restart_reason;
340
341
enum
342
{
343
  FZ_HTML_RESTARTER_FLAGS_NO_OVERFLOW = 1
344
};
345
346
typedef struct {
347
  /* start will be filled in on entry with the first node to start
348
   * operation on. NULL means start 'immediately'. As we traverse
349
   * the tree, once we reach the node to start on, we set this to
350
   * NULL, hence if 'start != NULL' then we are still skipping to
351
   * find the starting node. */
352
  fz_html_box *start;
353
354
  /* If start is a BOX_FLOW, then start_flow will be the flow entry
355
   * at which we should start. */
356
  fz_html_flow *start_flow;
357
358
359
  /* end should be NULL on entry. On exit, if it's NULL, then we
360
   * finished. Otherwise, this is where we should restart the
361
   * process the next time. */
362
  fz_html_box *end;
363
364
  /* If end is a BOX_FLOW, then end_flow will be the flow entry at which
365
   * we should restart next time. */
366
  fz_html_flow *end_flow;
367
368
369
  /* Workspace used on the traversal of the tree to store a good place
370
   * to restart. Typically this will be set to an enclosing box with
371
   * a border, so that if we then fail to put any content into the box
372
   * we'll elide the entire box/border, not output an empty one. */
373
  fz_html_box *potential;
374
375
  fz_html_restart_reason reason;
376
377
  int flags;
378
} fz_html_restarter;
379
380
struct fz_story
381
{
382
  /* fz_story is derived from fz_html_tree, so must start with */
383
  /* that. Argubly 'tree' should be called 'super'. */
384
  fz_html_tree tree;
385
386
  /* The user_css (or NULL) */
387
  char *user_css;
388
389
  /* The HTML story as XML nodes with a DOM */
390
  fz_xml *dom;
391
392
  /* The fontset for the content. */
393
  fz_html_font_set *font_set;
394
395
  /* restart_place holds the start position for the next place.
396
   * This is updated by draw. */
397
  fz_html_restarter restart_place;
398
399
  /* restart_draw holds the start position for the next draw.
400
   * This is updated by place. */
401
  fz_html_restarter restart_draw;
402
403
  /* complete is set true when all the story has been placed and
404
   * drawn. */
405
  int complete;
406
407
  /* The last bbox we laid out for. Used for making a clipping
408
   * rectangle. */
409
  fz_rect bbox;
410
411
  /* The default 'em' size. */
412
  float em;
413
414
  /* Collected parsing warnings. */
415
  fz_buffer *warnings;
416
417
  /* Rectangle layout count. */
418
  int rect_count;
419
420
  /* Archive from which to load any resources. */
421
  fz_archive *zip;
422
};
423
424
struct fz_html_box_s
425
{
426
  unsigned int type : 3;
427
  unsigned int is_first_flow : 1; /* for text-indent */
428
  unsigned int markup_dir : 2;
429
  unsigned int heading : 3;
430
  unsigned int list_item : 21;
431
432
  fz_html_box *up, *down, *next;
433
434
  const char *tag, *id, *href;
435
  const fz_css_style *style;
436
437
  union {
438
    /* Only needed during build stage */
439
    struct {
440
      fz_html_box *last_child;
441
      fz_html_flow **flow_tail;
442
    } build;
443
444
    /* Only needed during layout */
445
    struct {
446
      float x, y, w, b; /* content */
447
      float em, baseline;
448
    } layout;
449
  } s;
450
451
  union {
452
    /* Only BOX_FLOW use the following */
453
    struct {
454
      fz_html_flow *head;
455
    } flow;
456
457
    /* Only BOX_{BLOCK,TABLE,TABLE_ROW,TABLE_CELL} use the following */
458
    struct {
459
      float margin[4]; // TODO: is margin needed post layout?
460
      float border[4];
461
      float padding[4];
462
    } block;
463
  } u;
464
};
465
466
static inline int
467
fz_html_box_has_boxes(fz_html_box *box)
468
3.61k
{
469
3.61k
  return (box->type == BOX_BLOCK || box->type == BOX_TABLE || box->type == BOX_TABLE_ROW || box->type == BOX_TABLE_CELL);
470
3.61k
}
Unexecuted instantiation: epub-doc.c:fz_html_box_has_boxes
Unexecuted instantiation: html-doc.c:fz_html_box_has_boxes
Unexecuted instantiation: html-font.c:fz_html_box_has_boxes
html-layout.c:fz_html_box_has_boxes
Line
Count
Source
468
3.61k
{
469
3.61k
  return (box->type == BOX_BLOCK || box->type == BOX_TABLE || box->type == BOX_TABLE_ROW || box->type == BOX_TABLE_CELL);
470
3.61k
}
Unexecuted instantiation: html-outline.c:fz_html_box_has_boxes
Unexecuted instantiation: html-parse.c:fz_html_box_has_boxes
Unexecuted instantiation: mobi.c:fz_html_box_has_boxes
Unexecuted instantiation: office.c:fz_html_box_has_boxes
Unexecuted instantiation: css-apply.c:fz_html_box_has_boxes
Unexecuted instantiation: css-parse.c:fz_html_box_has_boxes
471
472
enum
473
{
474
  FLOW_WORD = 0,
475
  FLOW_SPACE = 1,
476
  FLOW_BREAK = 2,
477
  FLOW_IMAGE = 3,
478
  FLOW_SBREAK = 4,
479
  FLOW_SHYPHEN = 5,
480
  FLOW_ANCHOR = 6
481
};
482
483
struct fz_html_flow_s
484
{
485
  /* What type of node */
486
  unsigned int type : 3;
487
488
  /* Whether this should expand during justification */
489
  unsigned int expand : 1;
490
491
  /* Whether this node is currently taken as a line break */
492
  unsigned int breaks_line : 1;
493
494
  /* Whether this word node can be split or consists of a single glyph cluster */
495
  unsigned int atomic : 1;
496
497
  /* Whether lines may be broken before this word for overflow-wrap: word-break */
498
  unsigned int overflow_wrap : 1;
499
500
  /* Direction setting for text - UAX#9 says 125 is the max */
501
  unsigned int bidi_level : 7;
502
503
  /* The script detected by the bidi code. */
504
  unsigned int script : 8;
505
506
  /* Whether the markup specifies a given language. */
507
  unsigned short markup_lang;
508
509
  float x, y, w, h;
510
  fz_html_box *box; /* for style and em */
511
  fz_html_flow *next;
512
  union {
513
    char text[1];
514
    fz_image *image;
515
  } content;
516
};
517
518
519
fz_css *fz_new_css(fz_context *ctx);
520
void fz_parse_css(fz_context *ctx, fz_css *css, const char *source, const char *file);
521
fz_css_property *fz_parse_css_properties(fz_context *ctx, fz_pool *pool, const char *source);
522
void fz_drop_css(fz_context *ctx, fz_css *css);
523
void fz_debug_css(fz_context *ctx, fz_css *css);
524
const char *fz_css_property_name(int name);
525
526
void fz_match_css(fz_context *ctx, fz_css_match *match, fz_css_match *up, fz_css *css, fz_xml *node);
527
void fz_match_css_at_page(fz_context *ctx, fz_css_match *match, fz_css *css);
528
529
int fz_get_css_match_display(fz_css_match *node);
530
void fz_default_css_style(fz_context *ctx, fz_css_style *style);
531
void fz_apply_css_style(fz_context *ctx, fz_html_font_set *set, fz_css_style *style, fz_css_match *match);
532
533
/*
534
  Lookup style in the splay tree, returning a pointer
535
  to the found instance if there is one, creating and
536
  inserting (and moving to root) one if there is not.
537
*/
538
const fz_css_style *fz_css_enlist(fz_context *ctx, const fz_css_style *style, fz_css_style_splay **tree, fz_pool *pool);
539
540
float fz_from_css_number(fz_css_number number, float em, float percent_value, float auto_value);
541
float fz_from_css_number_scale(fz_css_number number, float scale);
542
int fz_css_number_defined(fz_css_number number);
543
544
fz_html_font_set *fz_new_html_font_set(fz_context *ctx);
545
void fz_add_html_font_face(fz_context *ctx, fz_html_font_set *set,
546
  const char *family, int is_bold, int is_italic, int is_small_caps, const char *src, fz_font *font);
547
fz_font *fz_load_html_font(fz_context *ctx, fz_html_font_set *set, const char *family, int is_bold, int is_italic, int is_small_caps);
548
void fz_drop_html_font_set(fz_context *ctx, fz_html_font_set *htx);
549
550
void fz_add_css_font_faces(fz_context *ctx, fz_html_font_set *set, fz_archive *dir, const char *base_uri, fz_css *css);
551
552
void fz_layout_html(fz_context *ctx, fz_html *html, float w, float h, float em);
553
void fz_draw_html(fz_context *ctx, fz_device *dev, fz_matrix ctm, fz_html *html, int page);
554
fz_outline *fz_load_html_outline(fz_context *ctx, fz_html *node);
555
556
float fz_find_html_target(fz_context *ctx, fz_html *html, const char *id);
557
fz_link *fz_load_html_links(fz_context *ctx, fz_html *html, int page, const char *base_uri);
558
fz_html *fz_keep_html(fz_context *ctx, fz_html *html);
559
void fz_drop_html(fz_context *ctx, fz_html *html);
560
fz_bookmark fz_make_html_bookmark(fz_context *ctx, fz_html *html, int page);
561
int fz_lookup_html_bookmark(fz_context *ctx, fz_html *html, fz_bookmark mark);
562
void fz_debug_html(fz_context *ctx, fz_html_box *box);
563
564
fz_html *fz_store_html(fz_context *ctx, fz_html *html, void *doc, int chapter);
565
fz_html *fz_find_html(fz_context *ctx, void *doc, int chapter);
566
void fz_purge_stored_html(fz_context *ctx, void *doc);
567
568
void fz_restartable_layout_html(fz_context *ctx, fz_html_tree *tree, float start_x, float start_y, float page_w, float page_h, float em, fz_html_restarter *restart);
569
570
fz_html_flow *fz_html_split_flow(fz_context *ctx, fz_pool *pool, fz_html_flow *flow, size_t offset);
571
572
fz_archive *fz_extract_html_from_mobi(fz_context *ctx, fz_buffer *mobi);
573
574
fz_structure fz_html_tag_to_structure(const char *tag);
575
576
fz_html *fz_parse_html(fz_context *ctx,
577
  fz_html_font_set *set, fz_archive *dir, const char *base_uri, fz_buffer *buf, const char *user_css,
578
  int try_xml, int try_html5, int patch_mobi);
579
580
fz_buffer *fz_txt_buffer_to_html(fz_context *ctx, fz_buffer *in);
581
582
583
#endif