/src/mupdf/source/pdf/pdf-font.c
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (C) 2004-2022 Artifex Software, Inc. |
2 | | // |
3 | | // This file is part of MuPDF. |
4 | | // |
5 | | // MuPDF is free software: you can redistribute it and/or modify it under the |
6 | | // terms of the GNU Affero General Public License as published by the Free |
7 | | // Software Foundation, either version 3 of the License, or (at your option) |
8 | | // any later version. |
9 | | // |
10 | | // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY |
11 | | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
12 | | // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more |
13 | | // details. |
14 | | // |
15 | | // You should have received a copy of the GNU Affero General Public License |
16 | | // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> |
17 | | // |
18 | | // Alternative licensing terms are available from the licensor. |
19 | | // For commercial licensing, see <https://www.artifex.com/> or contact |
20 | | // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, |
21 | | // CA 94129, USA, for further information. |
22 | | |
23 | | #include "mupdf/fitz.h" |
24 | | #include "mupdf/pdf.h" |
25 | | |
26 | | #include <assert.h> |
27 | | |
28 | | #include <ft2build.h> |
29 | | #include FT_FREETYPE_H |
30 | | #include FT_ADVANCES_H |
31 | | #ifdef FT_FONT_FORMATS_H |
32 | | #include FT_FONT_FORMATS_H |
33 | | #else |
34 | | #include FT_XFREE86_H |
35 | | #endif |
36 | | #include FT_TRUETYPE_TABLES_H |
37 | | |
38 | | #ifndef FT_SFNT_HEAD |
39 | | #define FT_SFNT_HEAD ft_sfnt_head |
40 | | #endif |
41 | | |
42 | | void |
43 | | pdf_load_encoding(const char **estrings, const char *encoding) |
44 | 6.60k | { |
45 | 6.60k | const char * const *bstrings = NULL; |
46 | 6.60k | int i; |
47 | | |
48 | 6.60k | if (!strcmp(encoding, "StandardEncoding")) |
49 | 3.02k | bstrings = fz_glyph_name_from_adobe_standard; |
50 | 6.60k | if (!strcmp(encoding, "MacRomanEncoding")) |
51 | 618 | bstrings = fz_glyph_name_from_mac_roman; |
52 | 6.60k | if (!strcmp(encoding, "MacExpertEncoding")) |
53 | 0 | bstrings = fz_glyph_name_from_mac_expert; |
54 | 6.60k | if (!strcmp(encoding, "WinAnsiEncoding")) |
55 | 2.69k | bstrings = fz_glyph_name_from_win_ansi; |
56 | | |
57 | 6.60k | if (bstrings) |
58 | 1.62M | for (i = 0; i < 256; i++) |
59 | 1.62M | estrings[i] = bstrings[i]; |
60 | 6.60k | } |
61 | | |
62 | | static void pdf_load_font_descriptor(fz_context *ctx, pdf_document *doc, pdf_font_desc *fontdesc, pdf_obj *dict, |
63 | | const char *collection, const char *basefont, int iscidfont); |
64 | | |
65 | | static const char *base_font_names[][10] = |
66 | | { |
67 | | { "Courier", "CourierNew", "CourierNewPSMT", NULL }, |
68 | | { "Courier-Bold", "CourierNew,Bold", "Courier,Bold", |
69 | | "CourierNewPS-BoldMT", "CourierNew-Bold", NULL }, |
70 | | { "Courier-Oblique", "CourierNew,Italic", "Courier,Italic", |
71 | | "CourierNewPS-ItalicMT", "CourierNew-Italic", NULL }, |
72 | | { "Courier-BoldOblique", "CourierNew,BoldItalic", "Courier,BoldItalic", |
73 | | "CourierNewPS-BoldItalicMT", "CourierNew-BoldItalic", NULL }, |
74 | | { "Helvetica", "ArialMT", "Arial", NULL }, |
75 | | { "Helvetica-Bold", "Arial-BoldMT", "Arial,Bold", "Arial-Bold", |
76 | | "Helvetica,Bold", NULL }, |
77 | | { "Helvetica-Oblique", "Arial-ItalicMT", "Arial,Italic", "Arial-Italic", |
78 | | "Helvetica,Italic", "Helvetica-Italic", NULL }, |
79 | | { "Helvetica-BoldOblique", "Arial-BoldItalicMT", |
80 | | "Arial,BoldItalic", "Arial-BoldItalic", |
81 | | "Helvetica,BoldItalic", "Helvetica-BoldItalic", NULL }, |
82 | | { "Times-Roman", "TimesNewRomanPSMT", "TimesNewRoman", |
83 | | "TimesNewRomanPS", NULL }, |
84 | | { "Times-Bold", "TimesNewRomanPS-BoldMT", "TimesNewRoman,Bold", |
85 | | "TimesNewRomanPS-Bold", "TimesNewRoman-Bold", NULL }, |
86 | | { "Times-Italic", "TimesNewRomanPS-ItalicMT", "TimesNewRoman,Italic", |
87 | | "TimesNewRomanPS-Italic", "TimesNewRoman-Italic", NULL }, |
88 | | { "Times-BoldItalic", "TimesNewRomanPS-BoldItalicMT", |
89 | | "TimesNewRoman,BoldItalic", "TimesNewRomanPS-BoldItalic", |
90 | | "TimesNewRoman-BoldItalic", NULL }, |
91 | | { "Symbol", "Symbol,Italic", "Symbol,Bold", "Symbol,BoldItalic", |
92 | | "SymbolMT", "SymbolMT,Italic", "SymbolMT,Bold", "SymbolMT,BoldItalic", NULL }, |
93 | | { "ZapfDingbats", NULL } |
94 | | }; |
95 | | |
96 | | const unsigned char * |
97 | | pdf_lookup_substitute_font(fz_context *ctx, int mono, int serif, int bold, int italic, int *len) |
98 | 3.24k | { |
99 | 3.24k | if (mono) { |
100 | 88 | if (bold) { |
101 | 12 | if (italic) return fz_lookup_base14_font(ctx, "Courier-BoldOblique", len); |
102 | 12 | else return fz_lookup_base14_font(ctx, "Courier-Bold", len); |
103 | 76 | } else { |
104 | 76 | if (italic) return fz_lookup_base14_font(ctx, "Courier-Oblique", len); |
105 | 73 | else return fz_lookup_base14_font(ctx, "Courier", len); |
106 | 76 | } |
107 | 3.16k | } else if (serif) { |
108 | 360 | if (bold) { |
109 | 105 | if (italic) return fz_lookup_base14_font(ctx, "Times-BoldItalic", len); |
110 | 85 | else return fz_lookup_base14_font(ctx, "Times-Bold", len); |
111 | 255 | } else { |
112 | 255 | if (italic) return fz_lookup_base14_font(ctx, "Times-Italic", len); |
113 | 200 | else return fz_lookup_base14_font(ctx, "Times-Roman", len); |
114 | 255 | } |
115 | 2.80k | } else { |
116 | 2.80k | if (bold) { |
117 | 393 | if (italic) return fz_lookup_base14_font(ctx, "Helvetica-BoldOblique", len); |
118 | 388 | else return fz_lookup_base14_font(ctx, "Helvetica-Bold", len); |
119 | 2.40k | } else { |
120 | 2.40k | if (italic) return fz_lookup_base14_font(ctx, "Helvetica-Oblique", len); |
121 | 2.34k | else return fz_lookup_base14_font(ctx, "Helvetica", len); |
122 | 2.40k | } |
123 | 2.80k | } |
124 | 3.24k | } |
125 | | |
126 | | static int is_dynalab(char *name) |
127 | 2.69k | { |
128 | 2.69k | if (strstr(name, "HuaTian")) |
129 | 0 | return 1; |
130 | 2.69k | if (strstr(name, "MingLi")) |
131 | 0 | return 1; |
132 | 2.69k | if ((strstr(name, "DF") == name) || strstr(name, "+DF")) |
133 | 0 | return 1; |
134 | 2.69k | if ((strstr(name, "DLC") == name) || strstr(name, "+DLC")) |
135 | 469 | return 1; |
136 | 2.22k | return 0; |
137 | 2.69k | } |
138 | | |
139 | | static int strcmp_ignore_space(const char *a, const char *b) |
140 | 393k | { |
141 | 459k | while (1) |
142 | 459k | { |
143 | 459k | while (*a == ' ') |
144 | 0 | a++; |
145 | 460k | while (*b == ' ') |
146 | 364 | b++; |
147 | 459k | if (*a != *b) |
148 | 391k | return 1; |
149 | 68.2k | if (*a == 0) |
150 | 2.49k | return *a != *b; |
151 | 65.7k | if (*b == 0) |
152 | 0 | return *a != *b; |
153 | 65.7k | a++; |
154 | 65.7k | b++; |
155 | 65.7k | } |
156 | 393k | } |
157 | | |
158 | | const char *pdf_clean_font_name(const char *fontname) |
159 | 7.27k | { |
160 | 7.27k | int i, k; |
161 | 90.3k | for (i = 0; i < (int)nelem(base_font_names); i++) |
162 | 477k | for (k = 0; base_font_names[i][k]; k++) |
163 | 393k | if (!strcmp_ignore_space(base_font_names[i][k], fontname)) |
164 | 2.49k | return base_font_names[i][0]; |
165 | 4.78k | return fontname; |
166 | 7.27k | } |
167 | | |
168 | | /* |
169 | | * FreeType and Rendering glue |
170 | | */ |
171 | | |
172 | | enum { UNKNOWN, TYPE1, TRUETYPE }; |
173 | | |
174 | | static int ft_kind(fz_context *ctx, FT_Face face) |
175 | 18.2k | { |
176 | 18.2k | const char *kind; |
177 | 18.2k | fz_ft_lock(ctx); |
178 | 18.2k | #ifdef FT_FONT_FORMATS_H |
179 | 18.2k | kind = FT_Get_Font_Format(face); |
180 | | #else |
181 | | kind = FT_Get_X11_Font_Format(face); |
182 | | #endif |
183 | 18.2k | fz_ft_unlock(ctx); |
184 | 18.2k | if (!strcmp(kind, "TrueType")) return TRUETYPE; |
185 | 13.5k | if (!strcmp(kind, "Type 1")) return TYPE1; |
186 | 12.1k | if (!strcmp(kind, "CFF")) return TYPE1; |
187 | 0 | if (!strcmp(kind, "CID Type 1")) return TYPE1; |
188 | 0 | return UNKNOWN; |
189 | 0 | } |
190 | | |
191 | | static int ft_cid_to_gid(pdf_font_desc *fontdesc, int cid) |
192 | 41.1M | { |
193 | 41.1M | if (fontdesc->to_ttf_cmap) |
194 | 30.2M | { |
195 | 30.2M | cid = pdf_lookup_cmap(fontdesc->to_ttf_cmap, cid); |
196 | | |
197 | | /* vertical presentation forms */ |
198 | 30.2M | if (fontdesc->font->flags.ft_substitute && fontdesc->wmode) |
199 | 360 | { |
200 | 360 | switch (cid) |
201 | 360 | { |
202 | 0 | case 0x0021: cid = 0xFE15; break; /* ! */ |
203 | 0 | case 0x0028: cid = 0xFE35; break; /* ( */ |
204 | 0 | case 0x0029: cid = 0xFE36; break; /* ) */ |
205 | 0 | case 0x002C: cid = 0xFE10; break; /* , */ |
206 | 0 | case 0x003A: cid = 0xFE13; break; /* : */ |
207 | 0 | case 0x003B: cid = 0xFE14; break; /* ; */ |
208 | 0 | case 0x003F: cid = 0xFE16; break; /* ? */ |
209 | 0 | case 0x005B: cid = 0xFE47; break; /* [ */ |
210 | 0 | case 0x005D: cid = 0xFE48; break; /* ] */ |
211 | 0 | case 0x005F: cid = 0xFE33; break; /* _ */ |
212 | 0 | case 0x007B: cid = 0xFE37; break; /* { */ |
213 | 0 | case 0x007D: cid = 0xFE38; break; /* } */ |
214 | 0 | case 0x2013: cid = 0xFE32; break; /* EN DASH */ |
215 | 0 | case 0x2014: cid = 0xFE31; break; /* EM DASH */ |
216 | 0 | case 0x2025: cid = 0xFE30; break; /* TWO DOT LEADER */ |
217 | 0 | case 0x2026: cid = 0xFE19; break; /* HORIZONTAL ELLIPSIS */ |
218 | 0 | case 0x3001: cid = 0xFE11; break; /* IDEOGRAPHIC COMMA */ |
219 | 0 | case 0x3002: cid = 0xFE12; break; /* IDEOGRAPHIC FULL STOP */ |
220 | 0 | case 0x3008: cid = 0xFE3F; break; /* OPENING ANGLE BRACKET */ |
221 | 0 | case 0x3009: cid = 0xFE40; break; /* CLOSING ANGLE BRACKET */ |
222 | 0 | case 0x300A: cid = 0xFE3D; break; /* LEFT DOUBLE ANGLE BRACKET */ |
223 | 0 | case 0x300B: cid = 0xFE3E; break; /* RIGHT DOUBLE ANGLE BRACKET */ |
224 | 0 | case 0x300C: cid = 0xFE41; break; /* LEFT CORNER BRACKET */ |
225 | 0 | case 0x300D: cid = 0xFE42; break; /* RIGHT CORNER BRACKET */ |
226 | 0 | case 0x300E: cid = 0xFE43; break; /* LEFT WHITE CORNER BRACKET */ |
227 | 0 | case 0x300F: cid = 0xFE44; break; /* RIGHT WHITE CORNER BRACKET */ |
228 | 0 | case 0x3010: cid = 0xFE3B; break; /* LEFT BLACK LENTICULAR BRACKET */ |
229 | 0 | case 0x3011: cid = 0xFE3C; break; /* RIGHT BLACK LENTICULAR BRACKET */ |
230 | 0 | case 0x3014: cid = 0xFE39; break; /* LEFT TORTOISE SHELL BRACKET */ |
231 | 0 | case 0x3015: cid = 0xFE3A; break; /* RIGHT TORTOISE SHELL BRACKET */ |
232 | 0 | case 0x3016: cid = 0xFE17; break; /* LEFT WHITE LENTICULAR BRACKET */ |
233 | 0 | case 0x3017: cid = 0xFE18; break; /* RIGHT WHITE LENTICULAR BRACKET */ |
234 | | |
235 | 0 | case 0xFF01: cid = 0xFE15; break; /* FULLWIDTH EXCLAMATION MARK */ |
236 | 0 | case 0xFF08: cid = 0xFE35; break; /* FULLWIDTH LEFT PARENTHESIS */ |
237 | 0 | case 0xFF09: cid = 0xFE36; break; /* FULLWIDTH RIGHT PARENTHESIS */ |
238 | 0 | case 0xFF0C: cid = 0xFE10; break; /* FULLWIDTH COMMA */ |
239 | 0 | case 0xFF1A: cid = 0xFE13; break; /* FULLWIDTH COLON */ |
240 | 0 | case 0xFF1B: cid = 0xFE14; break; /* FULLWIDTH SEMICOLON */ |
241 | 0 | case 0xFF1F: cid = 0xFE16; break; /* FULLWIDTH QUESTION MARK */ |
242 | 0 | case 0xFF3B: cid = 0xFE47; break; /* FULLWIDTH LEFT SQUARE BRACKET */ |
243 | 0 | case 0xFF3D: cid = 0xFE48; break; /* FULLWIDTH RIGHT SQUARE BRACKET */ |
244 | 0 | case 0xFF3F: cid = 0xFE33; break; /* FULLWIDTH LOW LINE */ |
245 | 0 | case 0xFF5B: cid = 0xFE37; break; /* FULLWIDTH LEFT CURLY BRACKET */ |
246 | 0 | case 0xFF5D: cid = 0xFE38; break; /* FULLWIDTH RIGHT CURLY BRACKET */ |
247 | | |
248 | 18 | case 0x30FC: cid = 0xFE31; break; /* KATAKANA-HIRAGANA PROLONGED SOUND MARK */ |
249 | 0 | case 0xFF0D: cid = 0xFE31; break; /* FULLWIDTH HYPHEN-MINUS */ |
250 | 360 | } |
251 | 360 | } |
252 | | |
253 | 30.2M | return ft_char_index(fontdesc->font->ft_face, cid); |
254 | 30.2M | } |
255 | | |
256 | 10.9M | if (fontdesc->cid_to_gid && (size_t)cid < fontdesc->cid_to_gid_len && cid >= 0) |
257 | 7.36M | return fontdesc->cid_to_gid[cid]; |
258 | | |
259 | 3.56M | return cid; |
260 | 10.9M | } |
261 | | |
262 | | int |
263 | | pdf_font_cid_to_gid(fz_context *ctx, pdf_font_desc *fontdesc, int cid) |
264 | 40.0M | { |
265 | 40.0M | if (fontdesc->font->ft_face) |
266 | 40.0M | { |
267 | 40.0M | int gid; |
268 | 40.0M | fz_ft_lock(ctx); |
269 | 40.0M | gid = ft_cid_to_gid(fontdesc, cid); |
270 | 40.0M | fz_ft_unlock(ctx); |
271 | 40.0M | return gid; |
272 | 40.0M | } |
273 | 66.6k | return cid; |
274 | 40.0M | } |
275 | | |
276 | | static int ft_width(fz_context *ctx, pdf_font_desc *fontdesc, int cid) |
277 | 1.13M | { |
278 | 1.13M | int mask = FT_LOAD_NO_SCALE | FT_LOAD_NO_HINTING | FT_LOAD_NO_BITMAP | FT_LOAD_IGNORE_TRANSFORM; |
279 | 1.13M | int gid = ft_cid_to_gid(fontdesc, cid); |
280 | 1.13M | FT_Fixed adv = 0; |
281 | 1.13M | int fterr; |
282 | 1.13M | FT_Face face = fontdesc->font->ft_face; |
283 | 1.13M | FT_UShort units_per_EM; |
284 | | |
285 | 1.13M | fterr = FT_Get_Advance(face, gid, mask, &adv); |
286 | 1.13M | if (fterr && fterr != FT_Err_Invalid_Argument) |
287 | 9.65k | fz_warn(ctx, "FT_Get_Advance(%d): %s", gid, ft_error_string(fterr)); |
288 | | |
289 | 1.13M | units_per_EM = face->units_per_EM; |
290 | 1.13M | if (units_per_EM == 0) |
291 | 0 | units_per_EM = 2048; |
292 | | |
293 | 1.13M | return adv * 1000 / units_per_EM; |
294 | 1.13M | } |
295 | | |
296 | | static const struct { int code; const char *name; } mre_diff_table[] = |
297 | | { |
298 | | { 173, "notequal" }, |
299 | | { 176, "infinity" }, |
300 | | { 178, "lessequal" }, |
301 | | { 179, "greaterequal" }, |
302 | | { 182, "partialdiff" }, |
303 | | { 183, "summation" }, |
304 | | { 184, "product" }, |
305 | | { 185, "pi" }, |
306 | | { 186, "integral" }, |
307 | | { 189, "Omega" }, |
308 | | { 195, "radical" }, |
309 | | { 197, "approxequal" }, |
310 | | { 198, "Delta" }, |
311 | | { 215, "lozenge" }, |
312 | | { 219, "Euro" }, |
313 | | { 240, "apple" }, |
314 | | }; |
315 | | |
316 | | static int lookup_mre_code(const char *name) |
317 | 33.2k | { |
318 | 33.2k | int i; |
319 | 565k | for (i = 0; i < (int)nelem(mre_diff_table); ++i) |
320 | 532k | if (!strcmp(name, mre_diff_table[i].name)) |
321 | 27 | return mre_diff_table[i].code; |
322 | 4.70M | for (i = 0; i < 256; i++) |
323 | 4.69M | if (fz_glyph_name_from_mac_roman[i] && !strcmp(name, fz_glyph_name_from_mac_roman[i])) |
324 | 32.7k | return i; |
325 | 486 | return -1; |
326 | 33.2k | } |
327 | | |
328 | | static int ft_find_glyph_by_unicode_name(FT_Face face, const char *name) |
329 | 140k | { |
330 | 140k | int unicode, glyph; |
331 | | |
332 | | /* Prefer exact unicode match if available. */ |
333 | 140k | unicode = fz_unicode_from_glyph_name_strict(name); |
334 | 140k | if (unicode > 0) |
335 | 138k | { |
336 | 138k | glyph = ft_char_index(face, unicode); |
337 | 138k | if (glyph > 0) |
338 | 40.2k | return glyph; |
339 | 138k | } |
340 | | |
341 | | /* Fall back to font glyph name if we can. */ |
342 | 99.8k | glyph = ft_name_index(face, name); |
343 | 99.8k | if (glyph > 0) |
344 | 282 | return glyph; |
345 | | |
346 | | /* Fuzzy unicode match as last attempt. */ |
347 | 99.5k | unicode = fz_unicode_from_glyph_name(name); |
348 | 99.5k | if (unicode > 0) |
349 | 99.5k | return ft_char_index(face, unicode); |
350 | | |
351 | | /* Failed. */ |
352 | 0 | return 0; |
353 | 99.5k | } |
354 | | |
355 | | /* |
356 | | * Load font files. |
357 | | */ |
358 | | |
359 | | static void |
360 | | pdf_load_builtin_font(fz_context *ctx, pdf_font_desc *fontdesc, const char *fontname, int has_descriptor) |
361 | 3.89k | { |
362 | 3.89k | FT_Face face; |
363 | 3.89k | const char *clean_name = pdf_clean_font_name(fontname); |
364 | 3.89k | if (clean_name == fontname) |
365 | 2.45k | clean_name = "Times-Roman"; |
366 | | |
367 | 3.89k | fontdesc->font = fz_load_system_font(ctx, fontname, 0, 0, !has_descriptor); |
368 | 3.89k | if (!fontdesc->font) |
369 | 3.89k | { |
370 | 3.89k | const unsigned char *data; |
371 | 3.89k | int len; |
372 | | |
373 | 3.89k | data = fz_lookup_base14_font(ctx, clean_name, &len); |
374 | 3.89k | if (!data) |
375 | 0 | fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot find builtin font: '%s'", fontname); |
376 | | |
377 | 3.89k | fontdesc->font = fz_new_font_from_memory(ctx, fontname, data, len, 0, 1); |
378 | 3.89k | fontdesc->font->flags.is_serif = !!strstr(clean_name, "Times"); |
379 | 3.89k | } |
380 | | |
381 | 3.89k | if (!strcmp(clean_name, "Symbol") || !strcmp(clean_name, "ZapfDingbats")) |
382 | 223 | fontdesc->flags |= PDF_FD_SYMBOLIC; |
383 | | |
384 | 3.89k | face = fontdesc->font->ft_face; |
385 | 3.89k | fontdesc->ascent = 1000.0f * face->ascender / face->units_per_EM; |
386 | 3.89k | fontdesc->descent = 1000.0f * face->descender / face->units_per_EM; |
387 | 3.89k | } |
388 | | |
389 | | static void |
390 | | pdf_load_substitute_font(fz_context *ctx, pdf_font_desc *fontdesc, const char *fontname, int mono, int serif, int bold, int italic) |
391 | 3.24k | { |
392 | 3.24k | fontdesc->font = fz_load_system_font(ctx, fontname, bold, italic, 0); |
393 | 3.24k | if (!fontdesc->font) |
394 | 3.24k | { |
395 | 3.24k | const unsigned char *data; |
396 | 3.24k | int len; |
397 | | |
398 | 3.24k | data = pdf_lookup_substitute_font(ctx, mono, serif, bold, italic, &len); |
399 | 3.24k | if (!data) |
400 | 0 | fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot find substitute font"); |
401 | | |
402 | 3.24k | fontdesc->font = fz_new_font_from_memory(ctx, fontname, data, len, 0, 1); |
403 | 3.24k | fontdesc->font->flags.fake_bold = bold && !fontdesc->font->flags.is_bold; |
404 | 3.24k | fontdesc->font->flags.fake_italic = italic && !fontdesc->font->flags.is_italic; |
405 | | |
406 | 3.24k | fontdesc->font->flags.is_mono = mono; |
407 | 3.24k | fontdesc->font->flags.is_serif = serif; |
408 | 3.24k | fontdesc->font->flags.is_bold = bold; |
409 | 3.24k | fontdesc->font->flags.is_italic = italic; |
410 | 3.24k | } |
411 | | |
412 | 3.24k | fontdesc->font->flags.ft_substitute = 1; |
413 | 3.24k | fontdesc->font->flags.ft_stretch = 1; |
414 | 3.24k | } |
415 | | |
416 | | static void |
417 | | pdf_load_substitute_cjk_font(fz_context *ctx, pdf_font_desc *fontdesc, const char *fontname, int ros, int serif) |
418 | 291 | { |
419 | 291 | fontdesc->font = fz_load_system_cjk_font(ctx, fontname, ros, serif); |
420 | 291 | if (!fontdesc->font) |
421 | 291 | { |
422 | 291 | const unsigned char *data; |
423 | 291 | int size; |
424 | 291 | int subfont; |
425 | | |
426 | 291 | data = fz_lookup_cjk_font(ctx, ros, &size, &subfont); |
427 | 291 | if (!data) |
428 | 0 | fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot find builtin CJK font"); |
429 | | |
430 | | /* A glyph bbox cache is too big for CJK fonts. */ |
431 | 291 | fontdesc->font = fz_new_font_from_memory(ctx, fontname, data, size, subfont, 0); |
432 | 291 | } |
433 | | |
434 | 291 | fontdesc->font->flags.ft_substitute = 1; |
435 | 291 | fontdesc->font->flags.ft_stretch = 0; |
436 | 291 | fontdesc->font->flags.cjk = 1; |
437 | 291 | fontdesc->font->flags.cjk_lang = ros; |
438 | 291 | } |
439 | | |
440 | | static struct { int ros, serif; const char *name; } known_cjk_fonts[] = { |
441 | | { FZ_ADOBE_GB, 1, "SimFang" }, |
442 | | { FZ_ADOBE_GB, 0, "SimHei" }, |
443 | | { FZ_ADOBE_GB, 1, "SimKai" }, |
444 | | { FZ_ADOBE_GB, 1, "SimLi" }, |
445 | | { FZ_ADOBE_GB, 1, "SimSun" }, |
446 | | { FZ_ADOBE_GB, 1, "Song" }, |
447 | | |
448 | | { FZ_ADOBE_CNS, 1, "MingLiU" }, |
449 | | |
450 | | { FZ_ADOBE_JAPAN, 0, "Gothic" }, |
451 | | { FZ_ADOBE_JAPAN, 1, "Mincho" }, |
452 | | |
453 | | { FZ_ADOBE_KOREA, 1, "Batang" }, |
454 | | { FZ_ADOBE_KOREA, 0, "Gulim" }, |
455 | | { FZ_ADOBE_KOREA, 0, "Dotum" }, |
456 | | }; |
457 | | |
458 | | static int match_font_name(const char *s, const char *ref) |
459 | 11.3k | { |
460 | 11.3k | return !!strstr(s, ref); |
461 | 11.3k | } |
462 | | |
463 | | static void |
464 | | pdf_load_system_font(fz_context *ctx, pdf_font_desc *fontdesc, const char *fontname, const char *collection) |
465 | 3.53k | { |
466 | 3.53k | int bold = 0; |
467 | 3.53k | int italic = 0; |
468 | 3.53k | int serif = 0; |
469 | 3.53k | int mono = 0; |
470 | | |
471 | 3.53k | if (strstr(fontname, "Bold")) |
472 | 548 | bold = 1; |
473 | 3.53k | if (strstr(fontname, "Italic")) |
474 | 76 | italic = 1; |
475 | 3.53k | if (strstr(fontname, "Oblique")) |
476 | 0 | italic = 1; |
477 | | |
478 | 3.53k | if (fontdesc->flags & PDF_FD_FIXED_PITCH) |
479 | 93 | mono = 1; |
480 | 3.53k | if (fontdesc->flags & PDF_FD_SERIF) |
481 | 546 | serif = 1; |
482 | 3.53k | if (fontdesc->flags & PDF_FD_ITALIC) |
483 | 132 | italic = 1; |
484 | 3.53k | if (fontdesc->flags & PDF_FD_FORCE_BOLD) |
485 | 27 | bold = 1; |
486 | | |
487 | 3.53k | if (collection) |
488 | 1.21k | { |
489 | 1.21k | if (!strcmp(collection, "Adobe-CNS1")) |
490 | 42 | pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_CNS, serif); |
491 | 1.16k | else if (!strcmp(collection, "Adobe-GB1")) |
492 | 26 | pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_GB, serif); |
493 | 1.14k | else if (!strcmp(collection, "Adobe-Japan1")) |
494 | 136 | pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_JAPAN, serif); |
495 | 1.00k | else if (!strcmp(collection, "Adobe-Korea1")) |
496 | 47 | pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_KOREA, serif); |
497 | 959 | else |
498 | 959 | { |
499 | 959 | size_t i; |
500 | 959 | if (strcmp(collection, "Adobe-Identity") != 0) |
501 | 206 | fz_warn(ctx, "unknown cid collection: %s", collection); |
502 | | |
503 | | // Recognize common CJK fonts when using Identity or other non-CJK CMap |
504 | 12.2k | for (i = 0; i < nelem(known_cjk_fonts); ++i) |
505 | 11.3k | { |
506 | 11.3k | if (match_font_name(fontname, known_cjk_fonts[i].name)) |
507 | 40 | { |
508 | 40 | pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, |
509 | 40 | known_cjk_fonts[i].ros, known_cjk_fonts[i].serif); |
510 | 40 | return; |
511 | 40 | } |
512 | 11.3k | } |
513 | | |
514 | 919 | pdf_load_substitute_font(ctx, fontdesc, fontname, mono, serif, bold, italic); |
515 | 919 | } |
516 | 1.21k | } |
517 | 2.32k | else |
518 | 2.32k | { |
519 | 2.32k | pdf_load_substitute_font(ctx, fontdesc, fontname, mono, serif, bold, italic); |
520 | 2.32k | } |
521 | 3.53k | } |
522 | | |
523 | 42 | #define TTF_U16(p) ((uint16_t) ((p)[0]<<8) | ((p)[1])) |
524 | 72 | #define TTF_U32(p) ((uint32_t) ((p)[0]<<24) | ((p)[1]<<16) | ((p)[2]<<8) | ((p)[3])) |
525 | | |
526 | | static fz_buffer * |
527 | | pdf_extract_cff_subtable(fz_context *ctx, unsigned char *data, size_t size) |
528 | 42 | { |
529 | 42 | size_t num_tables = TTF_U16(data + 4); |
530 | 42 | size_t i; |
531 | | |
532 | 42 | if (12 + num_tables * 16 > size) |
533 | 4 | fz_throw(ctx, FZ_ERROR_SYNTAX, "invalid TTF header"); |
534 | | |
535 | 308 | for (i = 0; i < num_tables; ++i) |
536 | 306 | { |
537 | 306 | unsigned char *record = data + 12 + i * 16; |
538 | 306 | if (!memcmp("CFF ", record, 4)) |
539 | 36 | { |
540 | 36 | uint64_t offset = TTF_U32(record + 8); |
541 | 36 | uint64_t length = TTF_U32(record + 12); |
542 | 36 | uint64_t end = offset + length; |
543 | 36 | if (end > size) |
544 | 7 | fz_throw(ctx, FZ_ERROR_SYNTAX, "invalid TTF subtable offset/length"); |
545 | 29 | return fz_new_buffer_from_copied_data(ctx, data + offset, length); |
546 | 36 | } |
547 | 306 | } |
548 | | |
549 | 2 | return NULL; |
550 | 38 | } |
551 | | |
552 | | static void |
553 | | pdf_load_embedded_font(fz_context *ctx, pdf_document *doc, pdf_font_desc *fontdesc, const char *fontname, pdf_obj *stmref) |
554 | 7.26k | { |
555 | 7.26k | fz_buffer *buf; |
556 | 7.26k | unsigned char *data; |
557 | 7.26k | size_t size; |
558 | | |
559 | 7.26k | fz_var(buf); |
560 | | |
561 | 7.26k | buf = pdf_load_stream(ctx, stmref); |
562 | | |
563 | 13.5k | fz_try(ctx) |
564 | 13.5k | { |
565 | | /* Extract CFF subtable for OpenType fonts: */ |
566 | 6.75k | size = fz_buffer_storage(ctx, buf, &data); |
567 | 6.75k | if (size > 12) { |
568 | 6.45k | if (!memcmp("OTTO", data, 4)) { |
569 | 42 | fz_buffer *cff = pdf_extract_cff_subtable(ctx, data, size); |
570 | 42 | if (cff) |
571 | 29 | { |
572 | 29 | fz_drop_buffer(ctx, buf); |
573 | 29 | buf = cff; |
574 | 29 | } |
575 | 42 | } |
576 | 6.45k | } |
577 | | |
578 | 6.75k | fontdesc->font = fz_new_font_from_buffer(ctx, fontname, buf, 0, 1); |
579 | 6.75k | } |
580 | 13.5k | fz_always(ctx) |
581 | 6.75k | fz_drop_buffer(ctx, buf); |
582 | 6.75k | fz_catch(ctx) |
583 | 2.23k | fz_rethrow(ctx); |
584 | | |
585 | 5.02k | fontdesc->size += fz_buffer_storage(ctx, buf, NULL); |
586 | 5.02k | fontdesc->is_embedded = 1; |
587 | 5.02k | } |
588 | | |
589 | | /* |
590 | | * Create and destroy |
591 | | */ |
592 | | |
593 | | pdf_font_desc * |
594 | | pdf_keep_font(fz_context *ctx, pdf_font_desc *fontdesc) |
595 | 232k | { |
596 | 232k | return fz_keep_storable(ctx, &fontdesc->storable); |
597 | 232k | } |
598 | | |
599 | | void |
600 | | pdf_drop_font(fz_context *ctx, pdf_font_desc *fontdesc) |
601 | 844k | { |
602 | 844k | fz_drop_storable(ctx, &fontdesc->storable); |
603 | 844k | } |
604 | | |
605 | | static int |
606 | | pdf_font_is_droppable(fz_context *ctx, fz_storable *fontdesc) |
607 | 5 | { |
608 | | /* If we aren't holding the FT lock, then we can drop. */ |
609 | 5 | return !fz_ft_lock_held(ctx); |
610 | 5 | } |
611 | | |
612 | | static void |
613 | | pdf_drop_font_imp(fz_context *ctx, fz_storable *fontdesc_) |
614 | 12.4k | { |
615 | 12.4k | pdf_font_desc *fontdesc = (pdf_font_desc *)fontdesc_; |
616 | | |
617 | 12.4k | fz_drop_font(ctx, fontdesc->font); |
618 | 12.4k | pdf_drop_cmap(ctx, fontdesc->encoding); |
619 | 12.4k | pdf_drop_cmap(ctx, fontdesc->to_ttf_cmap); |
620 | 12.4k | pdf_drop_cmap(ctx, fontdesc->to_unicode); |
621 | 12.4k | fz_free(ctx, fontdesc->cid_to_gid); |
622 | 12.4k | fz_free(ctx, fontdesc->cid_to_ucs); |
623 | 12.4k | fz_free(ctx, fontdesc->hmtx); |
624 | 12.4k | fz_free(ctx, fontdesc->vmtx); |
625 | 12.4k | fz_free(ctx, fontdesc); |
626 | 12.4k | } |
627 | | |
628 | | pdf_font_desc * |
629 | | pdf_new_font_desc(fz_context *ctx) |
630 | 12.4k | { |
631 | 12.4k | pdf_font_desc *fontdesc; |
632 | | |
633 | 12.4k | fontdesc = fz_malloc_struct(ctx, pdf_font_desc); |
634 | 12.4k | FZ_INIT_AWKWARD_STORABLE(fontdesc, 1, pdf_drop_font_imp, pdf_font_is_droppable); |
635 | 12.4k | fontdesc->size = sizeof(pdf_font_desc); |
636 | | |
637 | 12.4k | fontdesc->font = NULL; |
638 | | |
639 | 12.4k | fontdesc->flags = 0; |
640 | 12.4k | fontdesc->italic_angle = 0; |
641 | 12.4k | fontdesc->ascent = 800; |
642 | 12.4k | fontdesc->descent = -200; |
643 | 12.4k | fontdesc->cap_height = 800; |
644 | 12.4k | fontdesc->x_height = 500; |
645 | 12.4k | fontdesc->missing_width = 0; |
646 | | |
647 | 12.4k | fontdesc->encoding = NULL; |
648 | 12.4k | fontdesc->to_ttf_cmap = NULL; |
649 | 12.4k | fontdesc->cid_to_gid_len = 0; |
650 | 12.4k | fontdesc->cid_to_gid = NULL; |
651 | | |
652 | 12.4k | fontdesc->to_unicode = NULL; |
653 | 12.4k | fontdesc->cid_to_ucs_len = 0; |
654 | 12.4k | fontdesc->cid_to_ucs = NULL; |
655 | | |
656 | 12.4k | fontdesc->wmode = 0; |
657 | | |
658 | 12.4k | fontdesc->hmtx_cap = 0; |
659 | 12.4k | fontdesc->vmtx_cap = 0; |
660 | 12.4k | fontdesc->hmtx_len = 0; |
661 | 12.4k | fontdesc->vmtx_len = 0; |
662 | 12.4k | fontdesc->hmtx = NULL; |
663 | 12.4k | fontdesc->vmtx = NULL; |
664 | | |
665 | 12.4k | fontdesc->dhmtx.lo = 0x0000; |
666 | 12.4k | fontdesc->dhmtx.hi = 0xFFFF; |
667 | 12.4k | fontdesc->dhmtx.w = 1000; |
668 | | |
669 | 12.4k | fontdesc->dvmtx.lo = 0x0000; |
670 | 12.4k | fontdesc->dvmtx.hi = 0xFFFF; |
671 | 12.4k | fontdesc->dvmtx.x = 0; |
672 | 12.4k | fontdesc->dvmtx.y = 880; |
673 | 12.4k | fontdesc->dvmtx.w = -1000; |
674 | | |
675 | 12.4k | fontdesc->is_embedded = 0; |
676 | | |
677 | 12.4k | return fontdesc; |
678 | 12.4k | } |
679 | | |
680 | | /* |
681 | | * Simple fonts (Type1 and TrueType) |
682 | | */ |
683 | | |
684 | | static FT_CharMap |
685 | | select_type1_cmap(FT_Face face) |
686 | 7.80k | { |
687 | 7.80k | int i; |
688 | 15.5k | for (i = 0; i < face->num_charmaps; i++) |
689 | 15.5k | if (face->charmaps[i]->platform_id == 7) |
690 | 7.79k | return face->charmaps[i]; |
691 | 6 | if (face->num_charmaps > 0) |
692 | 1 | return face->charmaps[0]; |
693 | 5 | return NULL; |
694 | 6 | } |
695 | | |
696 | | static FT_CharMap |
697 | | select_truetype_cmap(fz_context *ctx, FT_Face face, int symbolic) |
698 | 1.98k | { |
699 | 1.98k | int i; |
700 | | |
701 | | /* First look for a Microsoft symbolic cmap, if applicable */ |
702 | 1.98k | if (symbolic) |
703 | 715 | { |
704 | 3.04k | for (i = 0; i < face->num_charmaps; i++) |
705 | 2.40k | if (face->charmaps[i]->platform_id == 3 && face->charmaps[i]->encoding_id == 0) |
706 | 69 | return face->charmaps[i]; |
707 | 715 | } |
708 | | |
709 | 1.91k | fz_ft_lock(ctx); |
710 | | |
711 | | /* Then look for a Microsoft Unicode cmap */ |
712 | 5.39k | for (i = 0; i < face->num_charmaps; i++) |
713 | 4.14k | if (face->charmaps[i]->platform_id == 3 && face->charmaps[i]->encoding_id == 1) |
714 | 788 | if (FT_Get_CMap_Format(face->charmaps[i]) != -1) |
715 | 674 | { |
716 | 674 | fz_ft_unlock(ctx); |
717 | 674 | return face->charmaps[i]; |
718 | 674 | } |
719 | | |
720 | | /* Finally look for an Apple MacRoman cmap */ |
721 | 3.03k | for (i = 0; i < face->num_charmaps; i++) |
722 | 2.56k | if (face->charmaps[i]->platform_id == 1 && face->charmaps[i]->encoding_id == 0) |
723 | 773 | if (FT_Get_CMap_Format(face->charmaps[i]) != -1) |
724 | 773 | { |
725 | 773 | fz_ft_unlock(ctx); |
726 | 773 | return face->charmaps[i]; |
727 | 773 | } |
728 | | |
729 | 471 | if (face->num_charmaps > 0) |
730 | 57 | if (FT_Get_CMap_Format(face->charmaps[0]) != -1) |
731 | 33 | { |
732 | 33 | fz_ft_unlock(ctx); |
733 | 33 | return face->charmaps[0]; |
734 | 33 | } |
735 | | |
736 | 438 | fz_ft_unlock(ctx); |
737 | 438 | return NULL; |
738 | 471 | } |
739 | | |
740 | | static FT_CharMap |
741 | | select_unknown_cmap(FT_Face face) |
742 | 0 | { |
743 | 0 | if (face->num_charmaps > 0) |
744 | 0 | return face->charmaps[0]; |
745 | 0 | return NULL; |
746 | 0 | } |
747 | | |
748 | | static int use_s22pdf_workaround(fz_context *ctx, pdf_obj *dict, pdf_obj *descriptor) |
749 | 9.78k | { |
750 | 9.78k | if (descriptor) |
751 | 6.29k | { |
752 | 6.29k | if (pdf_dict_get(ctx, dict, PDF_NAME(Encoding)) != PDF_NAME(WinAnsiEncoding)) |
753 | 4.32k | return 0; |
754 | 1.96k | if (pdf_dict_get_int(ctx, descriptor, PDF_NAME(Flags)) != 4) |
755 | 1.82k | return 0; |
756 | 142 | return 1; |
757 | 1.96k | } |
758 | 3.49k | return 0; |
759 | 9.78k | } |
760 | | |
761 | | static pdf_font_desc * |
762 | | pdf_load_simple_font(fz_context *ctx, pdf_document *doc, pdf_obj *dict) |
763 | 9.78k | { |
764 | 9.78k | const char *basefont; |
765 | 9.78k | pdf_obj *descriptor; |
766 | 9.78k | pdf_obj *encoding; |
767 | 9.78k | pdf_obj *widths; |
768 | 9.78k | unsigned short *etable = NULL; |
769 | 9.78k | pdf_font_desc *fontdesc = NULL; |
770 | 9.78k | pdf_obj *subtype; |
771 | 9.78k | FT_Face face; |
772 | 9.78k | FT_CharMap cmap; |
773 | 9.78k | int symbolic; |
774 | 9.78k | int kind; |
775 | 9.78k | int glyph; |
776 | | |
777 | 9.78k | const char *estrings[256]; |
778 | 9.78k | char ebuffer[256][32]; |
779 | 9.78k | int i, k, n; |
780 | 9.78k | int fterr; |
781 | 9.78k | int has_lock = 0; |
782 | | |
783 | 9.78k | fz_var(fontdesc); |
784 | 9.78k | fz_var(etable); |
785 | 9.78k | fz_var(has_lock); |
786 | | |
787 | | /* Load font file */ |
788 | 19.5k | fz_try(ctx) |
789 | 19.5k | { |
790 | 9.78k | fontdesc = pdf_new_font_desc(ctx); |
791 | | |
792 | 9.78k | basefont = pdf_dict_get_name(ctx, dict, PDF_NAME(BaseFont)); |
793 | | |
794 | 9.78k | descriptor = pdf_dict_get(ctx, dict, PDF_NAME(FontDescriptor)); |
795 | 9.78k | if (descriptor) |
796 | 6.29k | pdf_load_font_descriptor(ctx, doc, fontdesc, descriptor, NULL, basefont, 0); |
797 | 3.49k | else |
798 | 3.49k | pdf_load_builtin_font(ctx, fontdesc, basefont, 0); |
799 | | |
800 | | /* Some chinese documents mistakenly consider WinAnsiEncoding to be codepage 936 */ |
801 | 9.78k | if (use_s22pdf_workaround(ctx, dict, descriptor)) |
802 | 142 | { |
803 | 142 | char *cp936fonts[] = { |
804 | 142 | "\xCB\xCE\xCC\xE5", "SimSun,Regular", |
805 | 142 | "\xBA\xDA\xCC\xE5", "SimHei,Regular", |
806 | 142 | "\xBF\xAC\xCC\xE5_GB2312", "SimKai,Regular", |
807 | 142 | "\xB7\xC2\xCB\xCE_GB2312", "SimFang,Regular", |
808 | 142 | "\xC1\xA5\xCA\xE9", "SimLi,Regular", |
809 | 142 | NULL |
810 | 142 | }; |
811 | 852 | for (i = 0; cp936fonts[i]; i += 2) |
812 | 710 | if (!strcmp(basefont, cp936fonts[i])) |
813 | 0 | break; |
814 | 142 | if (cp936fonts[i]) |
815 | 0 | { |
816 | 0 | fz_warn(ctx, "workaround for S22PDF lying about chinese font encodings"); |
817 | 0 | pdf_drop_font(ctx, fontdesc); |
818 | 0 | fontdesc = NULL; |
819 | 0 | fontdesc = pdf_new_font_desc(ctx); |
820 | 0 | pdf_load_font_descriptor(ctx, doc, fontdesc, descriptor, "Adobe-GB1", cp936fonts[i+1], 0); |
821 | 0 | fontdesc->encoding = pdf_load_system_cmap(ctx, "GBK-EUC-H"); |
822 | 0 | fontdesc->to_unicode = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2"); |
823 | 0 | fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2"); |
824 | |
|
825 | 0 | goto skip_encoding; |
826 | 0 | } |
827 | 142 | } |
828 | | |
829 | 9.78k | face = fontdesc->font->ft_face; |
830 | 9.78k | kind = ft_kind(ctx, face); |
831 | | |
832 | | /* Encoding */ |
833 | | |
834 | 9.78k | symbolic = fontdesc->flags & 4; |
835 | | /* Bug 703273: If non-symbolic, we're not symbolic. */ |
836 | 9.78k | if (fontdesc->flags & 32) |
837 | 2.69k | symbolic = 0; |
838 | | |
839 | 9.78k | if (kind == TYPE1) |
840 | 7.80k | cmap = select_type1_cmap(face); |
841 | 1.98k | else if (kind == TRUETYPE) |
842 | 1.98k | cmap = select_truetype_cmap(ctx, face, symbolic); |
843 | 0 | else |
844 | 0 | cmap = select_unknown_cmap(face); |
845 | | |
846 | 9.78k | if (cmap) |
847 | 9.34k | { |
848 | 9.34k | fz_ft_lock(ctx); |
849 | 9.34k | fterr = FT_Set_Charmap(face, cmap); |
850 | 9.34k | fz_ft_unlock(ctx); |
851 | 9.34k | if (fterr) |
852 | 0 | fz_warn(ctx, "freetype could not set cmap: %s", ft_error_string(fterr)); |
853 | 9.34k | } |
854 | 443 | else |
855 | 443 | fz_warn(ctx, "freetype could not find any cmaps"); |
856 | | |
857 | | /* FIXME: etable may leak on error. */ |
858 | 9.78k | etable = Memento_label(fz_malloc_array(ctx, 256, unsigned short), "cid_to_gid"); |
859 | 9.78k | fontdesc->size += 256 * sizeof(unsigned short); |
860 | 2.51M | for (i = 0; i < 256; i++) |
861 | 2.50M | { |
862 | 2.50M | estrings[i] = NULL; |
863 | 2.50M | etable[i] = 0; |
864 | 2.50M | } |
865 | | |
866 | 9.78k | encoding = pdf_dict_get(ctx, dict, PDF_NAME(Encoding)); |
867 | 9.78k | if (encoding) |
868 | 5.36k | { |
869 | 5.36k | if (pdf_is_name(ctx, encoding)) |
870 | 3.40k | pdf_load_encoding(estrings, pdf_to_name(ctx, encoding)); |
871 | | |
872 | 5.36k | if (pdf_is_dict(ctx, encoding)) |
873 | 1.69k | { |
874 | 1.69k | pdf_obj *base, *diff, *item; |
875 | | |
876 | 1.69k | base = pdf_dict_get(ctx, encoding, PDF_NAME(BaseEncoding)); |
877 | 1.69k | if (pdf_is_name(ctx, base)) |
878 | 152 | pdf_load_encoding(estrings, pdf_to_name(ctx, base)); |
879 | 1.54k | else if (!fontdesc->is_embedded && !symbolic) |
880 | 348 | pdf_load_encoding(estrings, "StandardEncoding"); |
881 | | |
882 | 1.69k | diff = pdf_dict_get(ctx, encoding, PDF_NAME(Differences)); |
883 | 1.69k | if (pdf_is_array(ctx, diff)) |
884 | 1.65k | { |
885 | 1.65k | n = pdf_array_len(ctx, diff); |
886 | 1.65k | k = 0; |
887 | 125k | for (i = 0; i < n; i++) |
888 | 123k | { |
889 | 123k | item = pdf_array_get(ctx, diff, i); |
890 | 123k | if (pdf_is_int(ctx, item)) |
891 | 11.9k | k = pdf_to_int(ctx, item); |
892 | 123k | if (pdf_is_name(ctx, item) && k >= 0 && k < (int)nelem(estrings)) |
893 | 110k | estrings[k++] = pdf_to_name(ctx, item); |
894 | 123k | } |
895 | 1.65k | } |
896 | 1.69k | } |
897 | 5.36k | } |
898 | 4.42k | else if (!fontdesc->is_embedded && !symbolic) |
899 | 2.67k | pdf_load_encoding(estrings, "StandardEncoding"); |
900 | | |
901 | 9.78k | fz_ft_lock(ctx); |
902 | 9.78k | has_lock = 1; |
903 | | |
904 | | /* start with the builtin encoding */ |
905 | 2.51M | for (i = 0; i < 256; i++) |
906 | 2.50M | etable[i] = ft_char_index(face, i); |
907 | | |
908 | | /* built-in and substitute fonts may be a different type than what the document expects */ |
909 | 9.78k | subtype = pdf_dict_get(ctx, dict, PDF_NAME(Subtype)); |
910 | 9.78k | if (pdf_name_eq(ctx, subtype, PDF_NAME(Type1))) |
911 | 4.01k | kind = TYPE1; |
912 | 5.77k | else if (pdf_name_eq(ctx, subtype, PDF_NAME(MMType1))) |
913 | 179 | kind = TYPE1; |
914 | 5.59k | else if (pdf_name_eq(ctx, subtype, PDF_NAME(TrueType))) |
915 | 2.74k | kind = TRUETYPE; |
916 | 2.85k | else if (pdf_name_eq(ctx, subtype, PDF_NAME(CIDFontType0))) |
917 | 0 | kind = TYPE1; |
918 | 2.85k | else if (pdf_name_eq(ctx, subtype, PDF_NAME(CIDFontType2))) |
919 | 14 | kind = TRUETYPE; |
920 | | |
921 | | /* encode by glyph name where we can */ |
922 | 9.78k | if (kind == TYPE1) |
923 | 6.84k | { |
924 | 1.75M | for (i = 0; i < 256; i++) |
925 | 1.75M | { |
926 | 1.75M | if (estrings[i]) |
927 | 864k | { |
928 | 864k | glyph = ft_name_index(face, estrings[i]); |
929 | 864k | if (glyph > 0) |
930 | 737k | etable[i] = glyph; |
931 | 864k | } |
932 | 1.75M | } |
933 | 6.84k | } |
934 | | |
935 | | /* encode by glyph name where we can */ |
936 | 9.78k | if (kind == TRUETYPE) |
937 | 2.94k | { |
938 | | /* Unicode cmap */ |
939 | 2.94k | if (!symbolic && face->charmap && face->charmap->platform_id == 3) |
940 | 681 | { |
941 | 175k | for (i = 0; i < 256; i++) |
942 | 174k | { |
943 | 174k | if (estrings[i]) |
944 | 140k | { |
945 | 140k | glyph = ft_find_glyph_by_unicode_name(face, estrings[i]); |
946 | 140k | if (glyph > 0) |
947 | 41.7k | etable[i] = glyph; |
948 | 140k | } |
949 | 174k | } |
950 | 681 | } |
951 | | |
952 | | /* MacRoman cmap */ |
953 | 2.26k | else if (!symbolic && face->charmap && face->charmap->platform_id == 1) |
954 | 288 | { |
955 | 74.0k | for (i = 0; i < 256; i++) |
956 | 73.7k | { |
957 | 73.7k | if (estrings[i]) |
958 | 33.2k | { |
959 | 33.2k | int mrcode = lookup_mre_code(estrings[i]); |
960 | 33.2k | glyph = 0; |
961 | 33.2k | if (mrcode > 0) |
962 | 32.8k | glyph = ft_char_index(face, mrcode); |
963 | 33.2k | if (glyph == 0) |
964 | 26.6k | glyph = ft_name_index(face, estrings[i]); |
965 | 33.2k | if (glyph > 0) |
966 | 6.76k | etable[i] = glyph; |
967 | 33.2k | } |
968 | 73.7k | } |
969 | 288 | } |
970 | | |
971 | | /* Symbolic cmap */ |
972 | 1.97k | else if (!face->charmap || face->charmap->encoding != FT_ENCODING_MS_SYMBOL) |
973 | 1.90k | { |
974 | 490k | for (i = 0; i < 256; i++) |
975 | 488k | { |
976 | 488k | if (estrings[i]) |
977 | 226k | { |
978 | 226k | glyph = ft_name_index(face, estrings[i]); |
979 | 226k | if (glyph > 0) |
980 | 180k | etable[i] = glyph; |
981 | 226k | } |
982 | 488k | } |
983 | 1.90k | } |
984 | 2.94k | } |
985 | | |
986 | | /* try to reverse the glyph names from the builtin encoding */ |
987 | 2.51M | for (i = 0; i < 256; i++) |
988 | 2.50M | { |
989 | 2.50M | if (etable[i] && !estrings[i]) |
990 | 293k | { |
991 | 293k | if (FT_HAS_GLYPH_NAMES(face)) |
992 | 235k | { |
993 | 235k | fterr = FT_Get_Glyph_Name(face, etable[i], ebuffer[i], 32); |
994 | 235k | if (fterr) |
995 | 0 | fz_warn(ctx, "freetype get glyph name (gid %d): %s", etable[i], ft_error_string(fterr)); |
996 | 235k | if (ebuffer[i][0]) |
997 | 235k | estrings[i] = ebuffer[i]; |
998 | 235k | } |
999 | 57.3k | else |
1000 | 57.3k | { |
1001 | 57.3k | estrings[i] = (char*) fz_glyph_name_from_win_ansi[i]; /* discard const */ |
1002 | 57.3k | } |
1003 | 293k | } |
1004 | 2.50M | } |
1005 | | |
1006 | | /* symbolic Type 1 fonts with an implicit encoding and non-standard glyph names */ |
1007 | 9.78k | if (kind == TYPE1 && symbolic) |
1008 | 2.02k | { |
1009 | 521k | for (i = 0; i < 256; i++) |
1010 | 519k | if (etable[i] && estrings[i] && !fz_unicode_from_glyph_name(estrings[i])) |
1011 | 0 | estrings[i] = (char*) fz_glyph_name_from_adobe_standard[i]; |
1012 | 2.02k | } |
1013 | | |
1014 | 9.78k | fz_ft_unlock(ctx); |
1015 | 9.78k | has_lock = 0; |
1016 | | |
1017 | 9.78k | fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 1); |
1018 | 9.78k | fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding); |
1019 | 9.78k | fontdesc->cid_to_gid_len = 256; |
1020 | 9.78k | fontdesc->cid_to_gid = etable; |
1021 | | |
1022 | 19.5k | fz_try(ctx) |
1023 | 19.5k | { |
1024 | 9.78k | pdf_load_to_unicode(ctx, doc, fontdesc, estrings, NULL, pdf_dict_get(ctx, dict, PDF_NAME(ToUnicode))); |
1025 | 9.78k | } |
1026 | 19.5k | fz_catch(ctx) |
1027 | 1 | { |
1028 | 1 | fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); |
1029 | 1 | fz_rethrow_if(ctx, FZ_ERROR_SYSTEM); |
1030 | 1 | fz_report_error(ctx); |
1031 | 1 | fz_warn(ctx, "cannot load ToUnicode CMap"); |
1032 | 1 | } |
1033 | | |
1034 | 9.78k | skip_encoding: |
1035 | | |
1036 | | /* Widths */ |
1037 | | |
1038 | 9.78k | pdf_set_default_hmtx(ctx, fontdesc, fontdesc->missing_width); |
1039 | | |
1040 | 9.78k | widths = pdf_dict_get(ctx, dict, PDF_NAME(Widths)); |
1041 | 9.78k | if (widths) |
1042 | 5.36k | { |
1043 | 5.36k | int first, last; |
1044 | | |
1045 | 5.36k | first = pdf_dict_get_int(ctx, dict, PDF_NAME(FirstChar)); |
1046 | 5.36k | last = pdf_dict_get_int(ctx, dict, PDF_NAME(LastChar)); |
1047 | | |
1048 | 5.36k | if (first < 0 || last > 255 || first > last) |
1049 | 195 | first = last = 0; |
1050 | | |
1051 | 499k | for (i = 0; i < last - first + 1; i++) |
1052 | 494k | { |
1053 | 494k | int wid = pdf_array_get_int(ctx, widths, i); |
1054 | 494k | pdf_add_hmtx(ctx, fontdesc, i + first, i + first, wid); |
1055 | 494k | } |
1056 | 5.36k | } |
1057 | 4.42k | else |
1058 | 4.42k | { |
1059 | 4.42k | fz_ft_lock(ctx); |
1060 | 4.42k | has_lock = 1; |
1061 | 1.13M | for (i = 0; i < 256; i++) |
1062 | 1.13M | pdf_add_hmtx(ctx, fontdesc, i, i, ft_width(ctx, fontdesc, i)); |
1063 | 4.42k | fz_ft_unlock(ctx); |
1064 | 4.42k | has_lock = 0; |
1065 | 4.42k | } |
1066 | | |
1067 | 9.78k | pdf_end_hmtx(ctx, fontdesc); |
1068 | 9.78k | } |
1069 | 19.5k | fz_catch(ctx) |
1070 | 0 | { |
1071 | 0 | if (has_lock) |
1072 | 0 | fz_ft_unlock(ctx); |
1073 | 0 | if (fontdesc && etable != fontdesc->cid_to_gid) |
1074 | 0 | fz_free(ctx, etable); |
1075 | 0 | pdf_drop_font(ctx, fontdesc); |
1076 | 0 | fz_rethrow(ctx); |
1077 | 0 | } |
1078 | 9.78k | return fontdesc; |
1079 | 9.78k | } |
1080 | | |
1081 | | static int |
1082 | | hail_mary_make_hash_key(fz_context *ctx, fz_store_hash *hash, void *key_) |
1083 | 36.6k | { |
1084 | 36.6k | hash->u.pi.i = 0; |
1085 | 36.6k | hash->u.pi.ptr = NULL; |
1086 | 36.6k | return 1; |
1087 | 36.6k | } |
1088 | | |
1089 | | static void * |
1090 | | hail_mary_keep_key(fz_context *ctx, void *key) |
1091 | 2.31k | { |
1092 | 2.31k | return key; |
1093 | 2.31k | } |
1094 | | |
1095 | | static void |
1096 | | hail_mary_drop_key(fz_context *ctx, void *key) |
1097 | 2.31k | { |
1098 | 2.31k | } |
1099 | | |
1100 | | static int |
1101 | | hail_mary_cmp_key(fz_context *ctx, void *k0, void *k1) |
1102 | 0 | { |
1103 | 0 | return k0 == k1; |
1104 | 0 | } |
1105 | | |
1106 | | static void |
1107 | | hail_mary_format_key(fz_context *ctx, char *s, size_t n, void *key_) |
1108 | 0 | { |
1109 | 0 | fz_strlcpy(s, "(hail mary font)", n); |
1110 | 0 | } |
1111 | | |
1112 | | static int hail_mary_store_key; /* Dummy */ |
1113 | | |
1114 | | static const fz_store_type hail_mary_store_type = |
1115 | | { |
1116 | | "hail-mary", |
1117 | | hail_mary_make_hash_key, |
1118 | | hail_mary_keep_key, |
1119 | | hail_mary_drop_key, |
1120 | | hail_mary_cmp_key, |
1121 | | hail_mary_format_key, |
1122 | | NULL |
1123 | | }; |
1124 | | |
1125 | | pdf_font_desc * |
1126 | | pdf_load_hail_mary_font(fz_context *ctx, pdf_document *doc) |
1127 | 31.9k | { |
1128 | 31.9k | pdf_font_desc *fontdesc; |
1129 | 31.9k | pdf_font_desc *existing; |
1130 | | |
1131 | 31.9k | if ((fontdesc = fz_find_item(ctx, pdf_drop_font_imp, &hail_mary_store_key, &hail_mary_store_type)) != NULL) |
1132 | 29.6k | { |
1133 | 29.6k | return fontdesc; |
1134 | 29.6k | } |
1135 | | |
1136 | | /* FIXME: Get someone with a clue about fonts to fix this */ |
1137 | 2.31k | fontdesc = pdf_load_simple_font(ctx, doc, NULL); |
1138 | | |
1139 | 2.31k | existing = fz_store_item(ctx, &hail_mary_store_key, fontdesc, fontdesc->size, &hail_mary_store_type); |
1140 | 2.31k | assert(existing == NULL); |
1141 | 2.31k | (void)existing; /* Silence warning in release builds */ |
1142 | | |
1143 | 2.31k | return fontdesc; |
1144 | 2.31k | } |
1145 | | |
1146 | | /* |
1147 | | * CID Fonts |
1148 | | */ |
1149 | | |
1150 | | static pdf_font_desc * |
1151 | | load_cid_font(fz_context *ctx, pdf_document *doc, pdf_obj *dict, pdf_obj *encoding, pdf_obj *to_unicode) |
1152 | 2.46k | { |
1153 | 2.46k | pdf_obj *widths; |
1154 | 2.46k | pdf_obj *descriptor; |
1155 | 2.46k | pdf_font_desc *fontdesc = NULL; |
1156 | 2.46k | fz_buffer *buf = NULL; |
1157 | 2.46k | pdf_cmap *cmap; |
1158 | 2.46k | FT_Face face; |
1159 | 2.46k | char collection[256]; |
1160 | 2.46k | const char *basefont; |
1161 | 2.46k | int i, k, fterr; |
1162 | 2.46k | pdf_obj *cidtogidmap; |
1163 | 2.46k | pdf_obj *obj; |
1164 | 2.46k | int dw; |
1165 | | |
1166 | 2.46k | fz_var(fontdesc); |
1167 | 2.46k | fz_var(buf); |
1168 | | |
1169 | 4.93k | fz_try(ctx) |
1170 | 4.93k | { |
1171 | | /* Get font name and CID collection */ |
1172 | | |
1173 | 2.46k | basefont = pdf_dict_get_name(ctx, dict, PDF_NAME(BaseFont)); |
1174 | | |
1175 | 2.46k | { |
1176 | 2.46k | pdf_obj *cidinfo; |
1177 | 2.46k | const char *reg, *ord; |
1178 | | |
1179 | 2.46k | cidinfo = pdf_dict_get(ctx, dict, PDF_NAME(CIDSystemInfo)); |
1180 | 2.46k | if (cidinfo) |
1181 | 2.29k | { |
1182 | 2.29k | reg = pdf_dict_get_string(ctx, cidinfo, PDF_NAME(Registry), NULL); |
1183 | 2.29k | ord = pdf_dict_get_string(ctx, cidinfo, PDF_NAME(Ordering), NULL); |
1184 | 2.29k | fz_snprintf(collection, sizeof collection, "%s-%s", reg, ord); |
1185 | 2.29k | } |
1186 | 173 | else |
1187 | 173 | { |
1188 | 173 | fz_warn(ctx, "CIDFont is missing CIDSystemInfo dictionary; assuming Adobe-Identity"); |
1189 | 173 | fz_strlcpy(collection, "Adobe-Identity", sizeof collection); |
1190 | 173 | } |
1191 | 2.46k | } |
1192 | | |
1193 | | /* Encoding */ |
1194 | | |
1195 | 2.46k | if (pdf_is_name(ctx, encoding)) |
1196 | 2.13k | { |
1197 | 2.13k | cmap = pdf_load_system_cmap(ctx, pdf_to_name(ctx, encoding)); |
1198 | 2.13k | } |
1199 | 331 | else if (pdf_is_indirect(ctx, encoding)) |
1200 | 111 | { |
1201 | 111 | cmap = pdf_load_embedded_cmap(ctx, doc, encoding); |
1202 | 111 | } |
1203 | 220 | else |
1204 | 220 | { |
1205 | 220 | fz_throw(ctx, FZ_ERROR_SYNTAX, "font missing encoding"); |
1206 | 220 | } |
1207 | | |
1208 | | /* Load font file */ |
1209 | | |
1210 | 2.24k | fontdesc = pdf_new_font_desc(ctx); |
1211 | | |
1212 | 2.24k | fontdesc->encoding = cmap; |
1213 | 2.24k | fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding); |
1214 | | |
1215 | 2.24k | pdf_set_font_wmode(ctx, fontdesc, pdf_cmap_wmode(ctx, fontdesc->encoding)); |
1216 | | |
1217 | 2.24k | descriptor = pdf_dict_get(ctx, dict, PDF_NAME(FontDescriptor)); |
1218 | 2.24k | if (!descriptor) |
1219 | 42 | fz_throw(ctx, FZ_ERROR_SYNTAX, "missing font descriptor"); |
1220 | 2.20k | pdf_load_font_descriptor(ctx, doc, fontdesc, descriptor, collection, basefont, 1); |
1221 | | |
1222 | 2.20k | face = fontdesc->font->ft_face; |
1223 | | |
1224 | | /* Apply encoding */ |
1225 | | |
1226 | 2.20k | cidtogidmap = pdf_dict_get(ctx, dict, PDF_NAME(CIDToGIDMap)); |
1227 | 2.20k | if (pdf_is_stream(ctx, cidtogidmap)) |
1228 | 143 | { |
1229 | 143 | size_t z, len; |
1230 | 143 | unsigned char *data; |
1231 | | |
1232 | 143 | buf = pdf_load_stream(ctx, cidtogidmap); |
1233 | | |
1234 | 143 | len = fz_buffer_storage(ctx, buf, &data); |
1235 | 143 | fontdesc->cid_to_gid_len = len / 2; |
1236 | 143 | fontdesc->cid_to_gid = Memento_label(fz_malloc_array(ctx, fontdesc->cid_to_gid_len, unsigned short), "cid_to_gid_map"); |
1237 | 143 | fontdesc->size += fontdesc->cid_to_gid_len * sizeof(unsigned short); |
1238 | 1.96M | for (z = 0; z < fontdesc->cid_to_gid_len; z++) |
1239 | 1.96M | fontdesc->cid_to_gid[z] = (data[z * 2] << 8) + data[z * 2 + 1]; |
1240 | 143 | } |
1241 | 2.06k | else if (cidtogidmap && !pdf_name_eq(ctx, PDF_NAME(Identity), cidtogidmap)) |
1242 | 19 | { |
1243 | 19 | fz_warn(ctx, "ignoring unknown CIDToGIDMap entry"); |
1244 | 19 | } |
1245 | | |
1246 | | /* if font is external, cidtogidmap should not be identity */ |
1247 | | /* so we map from cid to unicode and then map that through the (3 1) */ |
1248 | | /* unicode cmap to get a glyph id */ |
1249 | 2.04k | else if (fontdesc->font->flags.ft_substitute) |
1250 | 1.14k | { |
1251 | 1.14k | fz_ft_lock(ctx); |
1252 | 1.14k | fterr = FT_Select_Charmap(face, ft_encoding_unicode); |
1253 | 1.14k | fz_ft_unlock(ctx); |
1254 | 1.14k | if (fterr) |
1255 | 0 | fz_throw(ctx, FZ_ERROR_SYNTAX, "no unicode cmap when emulating CID font: %s", ft_error_string(fterr)); |
1256 | | |
1257 | 1.14k | if (!strcmp(collection, "Adobe-CNS1")) |
1258 | 42 | fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-CNS1-UCS2"); |
1259 | 1.10k | else if (!strcmp(collection, "Adobe-GB1")) |
1260 | 26 | fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2"); |
1261 | 1.07k | else if (!strcmp(collection, "Adobe-Japan1")) |
1262 | 129 | fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan1-UCS2"); |
1263 | 950 | else if (!strcmp(collection, "Adobe-Japan2")) |
1264 | 1 | fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan2-UCS2"); |
1265 | 949 | else if (!strcmp(collection, "Adobe-Korea1")) |
1266 | 47 | fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Korea1-UCS2"); |
1267 | 1.14k | } |
1268 | | |
1269 | 2.20k | pdf_load_to_unicode(ctx, doc, fontdesc, NULL, collection, to_unicode); |
1270 | | |
1271 | | /* If we have an identity encoding, we're supposed to use the glyph ids directly. |
1272 | | * If we only have a substitute font, that won't work. |
1273 | | * Make a last ditch attempt by using |
1274 | | * the ToUnicode table if it exists to map via the substitute font's cmap. */ |
1275 | 2.20k | if (strstr(fontdesc->encoding->cmap_name, "Identity-") && fontdesc->font->flags.ft_substitute) |
1276 | 858 | { |
1277 | 858 | if (!fontdesc->to_ttf_cmap) |
1278 | 795 | { |
1279 | 795 | if (fontdesc->to_unicode) |
1280 | 486 | { |
1281 | | // Use ToUnicode from PDF file if possible. |
1282 | 486 | fontdesc->to_ttf_cmap = pdf_keep_cmap(ctx, fontdesc->to_unicode); |
1283 | 486 | } |
1284 | 309 | else |
1285 | 309 | { |
1286 | | // Attempt a generic ToUnicode (default MacRoman ordering for TrueType) |
1287 | 309 | fontdesc->to_ttf_cmap = pdf_load_builtin_cmap(ctx, "TrueType-UCS2"); |
1288 | 309 | } |
1289 | 795 | } |
1290 | | |
1291 | 858 | if (fontdesc->to_ttf_cmap) |
1292 | 858 | { |
1293 | 858 | fz_warn(ctx, "non-embedded font using identity encoding: %s (mapping via %s)", basefont, fontdesc->to_ttf_cmap->cmap_name); |
1294 | 858 | if (!fontdesc->to_unicode) |
1295 | 309 | fontdesc->to_unicode = pdf_keep_cmap(ctx, fontdesc->to_ttf_cmap); |
1296 | 858 | } |
1297 | 0 | else |
1298 | 0 | fz_warn(ctx, "non-embedded font using identity encoding: %s", basefont); |
1299 | 858 | } |
1300 | | |
1301 | | /* Horizontal */ |
1302 | | |
1303 | 2.20k | dw = pdf_dict_get_int_default(ctx, dict, PDF_NAME(DW), 1000); |
1304 | 2.20k | pdf_set_default_hmtx(ctx, fontdesc, dw); |
1305 | | |
1306 | 2.20k | widths = pdf_dict_get(ctx, dict, PDF_NAME(W)); |
1307 | 2.20k | if (widths) |
1308 | 1.73k | { |
1309 | 1.73k | int c0, c1, w, n, m; |
1310 | | |
1311 | 1.73k | n = pdf_array_len(ctx, widths); |
1312 | 59.7k | for (i = 0; i < n; ) |
1313 | 57.9k | { |
1314 | 57.9k | c0 = pdf_array_get_int(ctx, widths, i); |
1315 | 57.9k | obj = pdf_array_get(ctx, widths, i + 1); |
1316 | 57.9k | if (pdf_is_array(ctx, obj)) |
1317 | 30.8k | { |
1318 | 30.8k | m = pdf_array_len(ctx, obj); |
1319 | 722k | for (k = 0; k < m; k++) |
1320 | 691k | { |
1321 | 691k | w = pdf_array_get_int(ctx, obj, k); |
1322 | 691k | pdf_add_hmtx(ctx, fontdesc, c0 + k, c0 + k, w); |
1323 | 691k | } |
1324 | 30.8k | i += 2; |
1325 | 30.8k | } |
1326 | 27.1k | else |
1327 | 27.1k | { |
1328 | 27.1k | c1 = pdf_to_int(ctx, obj); |
1329 | 27.1k | w = pdf_array_get_int(ctx, widths, i + 2); |
1330 | 27.1k | pdf_add_hmtx(ctx, fontdesc, c0, c1, w); |
1331 | 27.1k | i += 3; |
1332 | 27.1k | } |
1333 | 57.9k | } |
1334 | 1.73k | } |
1335 | | |
1336 | 2.20k | pdf_end_hmtx(ctx, fontdesc); |
1337 | | |
1338 | | /* Vertical */ |
1339 | | |
1340 | 2.20k | if (pdf_cmap_wmode(ctx, fontdesc->encoding) == 1) |
1341 | 7 | { |
1342 | 7 | int dw2y = 880; |
1343 | 7 | int dw2w = -1000; |
1344 | | |
1345 | 7 | obj = pdf_dict_get(ctx, dict, PDF_NAME(DW2)); |
1346 | 7 | if (obj) |
1347 | 3 | { |
1348 | 3 | dw2y = pdf_array_get_int(ctx, obj, 0); |
1349 | 3 | dw2w = pdf_array_get_int(ctx, obj, 1); |
1350 | 3 | } |
1351 | | |
1352 | 7 | pdf_set_default_vmtx(ctx, fontdesc, dw2y, dw2w); |
1353 | | |
1354 | 7 | widths = pdf_dict_get(ctx, dict, PDF_NAME(W2)); |
1355 | 7 | if (widths) |
1356 | 0 | { |
1357 | 0 | int c0, c1, w, x, y, n; |
1358 | |
|
1359 | 0 | n = pdf_array_len(ctx, widths); |
1360 | 0 | for (i = 0; i < n; ) |
1361 | 0 | { |
1362 | 0 | c0 = pdf_array_get_int(ctx, widths, i); |
1363 | 0 | obj = pdf_array_get(ctx, widths, i + 1); |
1364 | 0 | if (pdf_is_array(ctx, obj)) |
1365 | 0 | { |
1366 | 0 | int m = pdf_array_len(ctx, obj); |
1367 | 0 | for (k = 0; k * 3 < m; k ++) |
1368 | 0 | { |
1369 | 0 | w = pdf_array_get_int(ctx, obj, k * 3 + 0); |
1370 | 0 | x = pdf_array_get_int(ctx, obj, k * 3 + 1); |
1371 | 0 | y = pdf_array_get_int(ctx, obj, k * 3 + 2); |
1372 | 0 | pdf_add_vmtx(ctx, fontdesc, c0 + k, c0 + k, x, y, w); |
1373 | 0 | } |
1374 | 0 | i += 2; |
1375 | 0 | } |
1376 | 0 | else |
1377 | 0 | { |
1378 | 0 | c1 = pdf_to_int(ctx, obj); |
1379 | 0 | w = pdf_array_get_int(ctx, widths, i + 2); |
1380 | 0 | x = pdf_array_get_int(ctx, widths, i + 3); |
1381 | 0 | y = pdf_array_get_int(ctx, widths, i + 4); |
1382 | 0 | pdf_add_vmtx(ctx, fontdesc, c0, c1, x, y, w); |
1383 | 0 | i += 5; |
1384 | 0 | } |
1385 | 0 | } |
1386 | 0 | } |
1387 | | |
1388 | 7 | pdf_end_vmtx(ctx, fontdesc); |
1389 | 7 | } |
1390 | 2.20k | } |
1391 | 4.93k | fz_always(ctx) |
1392 | 2.46k | fz_drop_buffer(ctx, buf); |
1393 | 2.46k | fz_catch(ctx) |
1394 | 474 | { |
1395 | 474 | pdf_drop_font(ctx, fontdesc); |
1396 | 474 | fz_rethrow(ctx); |
1397 | 474 | } |
1398 | | |
1399 | 1.73k | return fontdesc; |
1400 | 2.20k | } |
1401 | | |
1402 | | static pdf_font_desc * |
1403 | | pdf_load_type0_font(fz_context *ctx, pdf_document *doc, pdf_obj *dict) |
1404 | 3.76k | { |
1405 | 3.76k | pdf_obj *dfonts; |
1406 | 3.76k | pdf_obj *dfont; |
1407 | 3.76k | pdf_obj *subtype; |
1408 | 3.76k | pdf_obj *encoding; |
1409 | 3.76k | pdf_obj *to_unicode; |
1410 | | |
1411 | 3.76k | dfonts = pdf_dict_get(ctx, dict, PDF_NAME(DescendantFonts)); |
1412 | 3.76k | if (!dfonts) |
1413 | 76 | fz_throw(ctx, FZ_ERROR_SYNTAX, "cid font is missing descendant fonts"); |
1414 | | |
1415 | 3.68k | dfont = pdf_array_get(ctx, dfonts, 0); |
1416 | | |
1417 | 3.68k | subtype = pdf_dict_get(ctx, dfont, PDF_NAME(Subtype)); |
1418 | 3.68k | encoding = pdf_dict_get(ctx, dict, PDF_NAME(Encoding)); |
1419 | 3.68k | to_unicode = pdf_dict_get(ctx, dict, PDF_NAME(ToUnicode)); |
1420 | | |
1421 | 3.68k | if (pdf_is_name(ctx, subtype) && pdf_name_eq(ctx, subtype, PDF_NAME(CIDFontType0))) |
1422 | 729 | return load_cid_font(ctx, doc, dfont, encoding, to_unicode); |
1423 | 2.96k | if (pdf_is_name(ctx, subtype) && pdf_name_eq(ctx, subtype, PDF_NAME(CIDFontType2))) |
1424 | 1.73k | return load_cid_font(ctx, doc, dfont, encoding, to_unicode); |
1425 | 1.22k | fz_throw(ctx, FZ_ERROR_SYNTAX, "unknown cid font type"); |
1426 | 2.96k | } |
1427 | | |
1428 | | /* |
1429 | | * FontDescriptor |
1430 | | */ |
1431 | | |
1432 | | static void |
1433 | | pdf_load_font_descriptor(fz_context *ctx, pdf_document *doc, pdf_font_desc *fontdesc, pdf_obj *dict, |
1434 | | const char *collection, const char *basefont, int iscidfont) |
1435 | 8.45k | { |
1436 | 8.45k | pdf_obj *obj1, *obj2, *obj3, *obj; |
1437 | 8.45k | const char *fontname; |
1438 | 8.45k | FT_Face face; |
1439 | | |
1440 | | /* Prefer BaseFont; don't bother with FontName */ |
1441 | 8.45k | fontname = basefont; |
1442 | | |
1443 | 8.45k | fontdesc->flags = pdf_dict_get_int(ctx, dict, PDF_NAME(Flags)); |
1444 | 8.45k | fontdesc->italic_angle = pdf_dict_get_real(ctx, dict, PDF_NAME(ItalicAngle)); |
1445 | 8.45k | fontdesc->ascent = pdf_dict_get_real(ctx, dict, PDF_NAME(Ascent)); |
1446 | 8.45k | fontdesc->descent = pdf_dict_get_real(ctx, dict, PDF_NAME(Descent)); |
1447 | 8.45k | fontdesc->cap_height = pdf_dict_get_real(ctx, dict, PDF_NAME(CapHeight)); |
1448 | 8.45k | fontdesc->x_height = pdf_dict_get_real(ctx, dict, PDF_NAME(XHeight)); |
1449 | 8.45k | fontdesc->missing_width = pdf_dict_get_real(ctx, dict, PDF_NAME(MissingWidth)); |
1450 | | |
1451 | 8.45k | obj1 = pdf_dict_get(ctx, dict, PDF_NAME(FontFile)); |
1452 | 8.45k | obj2 = pdf_dict_get(ctx, dict, PDF_NAME(FontFile2)); |
1453 | 8.45k | obj3 = pdf_dict_get(ctx, dict, PDF_NAME(FontFile3)); |
1454 | 8.45k | obj = obj1 ? obj1 : obj2 ? obj2 : obj3; |
1455 | | |
1456 | 8.45k | if (pdf_is_indirect(ctx, obj)) |
1457 | 7.26k | { |
1458 | 14.5k | fz_try(ctx) |
1459 | 14.5k | { |
1460 | 7.26k | pdf_load_embedded_font(ctx, doc, fontdesc, fontname, obj); |
1461 | 7.26k | } |
1462 | 14.5k | fz_catch(ctx) |
1463 | 2.74k | { |
1464 | 2.74k | fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); |
1465 | 2.74k | fz_rethrow_if(ctx, FZ_ERROR_SYSTEM); |
1466 | 2.74k | fz_report_error(ctx); |
1467 | 2.74k | fz_warn(ctx, "ignored error when loading embedded font; attempting to load system font"); |
1468 | 2.74k | if (!iscidfont && fontname != pdf_clean_font_name(fontname)) |
1469 | 199 | pdf_load_builtin_font(ctx, fontdesc, fontname, 1); |
1470 | 2.54k | else |
1471 | 2.54k | pdf_load_system_font(ctx, fontdesc, fontname, collection); |
1472 | 2.74k | } |
1473 | 7.26k | } |
1474 | 1.19k | else |
1475 | 1.19k | { |
1476 | 1.19k | if (!iscidfont && fontname != pdf_clean_font_name(fontname)) |
1477 | 199 | pdf_load_builtin_font(ctx, fontdesc, fontname, 1); |
1478 | 994 | else |
1479 | 994 | pdf_load_system_font(ctx, fontdesc, fontname, collection); |
1480 | 1.19k | } |
1481 | | |
1482 | | /* Check for DynaLab fonts that must use hinting */ |
1483 | 8.45k | face = fontdesc->font->ft_face; |
1484 | 8.45k | if (ft_kind(ctx, face) == TRUETYPE) |
1485 | 2.69k | { |
1486 | | /* FreeType's own 'tricky' font detection needs a bit of help */ |
1487 | 2.69k | if (is_dynalab(fontdesc->font->name)) |
1488 | 469 | face->face_flags |= FT_FACE_FLAG_TRICKY; |
1489 | | |
1490 | 2.69k | if (fontdesc->ascent == 0.0f) |
1491 | 269 | fontdesc->ascent = 1000.0f * face->ascender / face->units_per_EM; |
1492 | | |
1493 | 2.69k | if (fontdesc->descent == 0.0f) |
1494 | 587 | fontdesc->descent = 1000.0f * face->descender / face->units_per_EM; |
1495 | 2.69k | } |
1496 | 8.45k | } |
1497 | | |
1498 | | static void |
1499 | | pdf_make_width_table(fz_context *ctx, pdf_font_desc *fontdesc) |
1500 | 9.88k | { |
1501 | 9.88k | fz_font *font = fontdesc->font; |
1502 | 9.88k | int i, k, n, cid, gid; |
1503 | | |
1504 | 9.88k | n = 0; |
1505 | 1.78M | for (i = 0; i < fontdesc->hmtx_len; i++) |
1506 | 1.77M | { |
1507 | 19.6M | for (k = fontdesc->hmtx[i].lo; k <= fontdesc->hmtx[i].hi; k++) |
1508 | 17.8M | { |
1509 | 17.8M | cid = pdf_lookup_cmap(fontdesc->encoding, k); |
1510 | 17.8M | gid = pdf_font_cid_to_gid(ctx, fontdesc, cid); |
1511 | 17.8M | if (gid > n) |
1512 | 1.03M | n = gid; |
1513 | 17.8M | } |
1514 | 1.77M | } |
1515 | | |
1516 | 9.88k | font->width_count = n + 1; |
1517 | 9.88k | font->width_table = Memento_label(fz_malloc_array(ctx, font->width_count, short), "font_widths"); |
1518 | 9.88k | fontdesc->size += font->width_count * sizeof(short); |
1519 | | |
1520 | 9.88k | font->width_default = fontdesc->dhmtx.w; |
1521 | 4.08M | for (i = 0; i < font->width_count; i++) |
1522 | 4.07M | font->width_table[i] = -1; |
1523 | | |
1524 | 1.78M | for (i = 0; i < fontdesc->hmtx_len; i++) |
1525 | 1.77M | { |
1526 | 19.6M | for (k = fontdesc->hmtx[i].lo; k <= fontdesc->hmtx[i].hi; k++) |
1527 | 17.8M | { |
1528 | 17.8M | cid = pdf_lookup_cmap(fontdesc->encoding, k); |
1529 | 17.8M | gid = pdf_font_cid_to_gid(ctx, fontdesc, cid); |
1530 | 17.8M | if (gid >= 0 && gid < font->width_count) |
1531 | 16.8M | font->width_table[gid] = fz_maxi(fontdesc->hmtx[i].w, font->width_table[gid]); |
1532 | 17.8M | } |
1533 | 1.77M | } |
1534 | | |
1535 | 4.08M | for (i = 0; i < font->width_count; i++) |
1536 | 4.07M | if (font->width_table[i] == -1) |
1537 | 2.57M | font->width_table[i] = font->width_default; |
1538 | 9.88k | } |
1539 | | |
1540 | | pdf_font_desc * |
1541 | | pdf_load_font(fz_context *ctx, pdf_document *doc, pdf_obj *rdb, pdf_obj *dict) |
1542 | 91.6k | { |
1543 | 91.6k | pdf_obj *subtype; |
1544 | 91.6k | pdf_obj *dfonts; |
1545 | 91.6k | pdf_obj *charprocs; |
1546 | 91.6k | pdf_font_desc *fontdesc = NULL; |
1547 | 91.6k | int type3 = 0; |
1548 | | |
1549 | 91.6k | if ((fontdesc = pdf_find_item(ctx, pdf_drop_font_imp, dict)) != NULL) |
1550 | 79.9k | { |
1551 | 79.9k | if (fontdesc->t3loading) |
1552 | 39 | { |
1553 | 39 | pdf_drop_font(ctx, fontdesc); |
1554 | 39 | fz_throw(ctx, FZ_ERROR_SYNTAX, "recursive type3 font"); |
1555 | 39 | } |
1556 | 79.9k | return fontdesc; |
1557 | 79.9k | } |
1558 | | |
1559 | 11.6k | subtype = pdf_dict_get(ctx, dict, PDF_NAME(Subtype)); |
1560 | 11.6k | dfonts = pdf_dict_get(ctx, dict, PDF_NAME(DescendantFonts)); |
1561 | 11.6k | charprocs = pdf_dict_get(ctx, dict, PDF_NAME(CharProcs)); |
1562 | | |
1563 | 11.6k | if (pdf_name_eq(ctx, subtype, PDF_NAME(Type0))) |
1564 | 3.51k | fontdesc = pdf_load_type0_font(ctx, doc, dict); |
1565 | 8.16k | else if (pdf_name_eq(ctx, subtype, PDF_NAME(Type1))) |
1566 | 4.01k | fontdesc = pdf_load_simple_font(ctx, doc, dict); |
1567 | 4.15k | else if (pdf_name_eq(ctx, subtype, PDF_NAME(MMType1))) |
1568 | 179 | fontdesc = pdf_load_simple_font(ctx, doc, dict); |
1569 | 3.97k | else if (pdf_name_eq(ctx, subtype, PDF_NAME(TrueType))) |
1570 | 2.74k | fontdesc = pdf_load_simple_font(ctx, doc, dict); |
1571 | 1.23k | else if (pdf_name_eq(ctx, subtype, PDF_NAME(Type3))) |
1572 | 434 | { |
1573 | 434 | fontdesc = pdf_load_type3_font(ctx, doc, rdb, dict); |
1574 | 434 | type3 = 1; |
1575 | 434 | } |
1576 | 797 | else if (charprocs) |
1577 | 7 | { |
1578 | 7 | fz_warn(ctx, "unknown font format, guessing type3."); |
1579 | 7 | fontdesc = pdf_load_type3_font(ctx, doc, rdb, dict); |
1580 | 7 | type3 = 1; |
1581 | 7 | } |
1582 | 790 | else if (dfonts) |
1583 | 253 | { |
1584 | 253 | fz_warn(ctx, "unknown font format, guessing type0."); |
1585 | 253 | fontdesc = pdf_load_type0_font(ctx, doc, dict); |
1586 | 253 | } |
1587 | 537 | else |
1588 | 537 | { |
1589 | 537 | fz_warn(ctx, "unknown font format, guessing type1 or truetype."); |
1590 | 537 | fontdesc = pdf_load_simple_font(ctx, doc, dict); |
1591 | 537 | } |
1592 | | |
1593 | 19.7k | fz_try(ctx) |
1594 | 19.7k | { |
1595 | | /* Create glyph width table for stretching substitute fonts and text extraction. */ |
1596 | 9.88k | pdf_make_width_table(ctx, fontdesc); |
1597 | | |
1598 | 9.88k | pdf_store_item(ctx, dict, fontdesc, fontdesc->size); |
1599 | | |
1600 | | /* Load CharProcs */ |
1601 | 9.88k | if (type3) |
1602 | 425 | { |
1603 | 425 | fontdesc->t3loading = 1; |
1604 | 850 | fz_try(ctx) |
1605 | 850 | pdf_load_type3_glyphs(ctx, doc, fontdesc); |
1606 | 850 | fz_always(ctx) |
1607 | 425 | fontdesc->t3loading = 0; |
1608 | 425 | fz_catch(ctx) |
1609 | 1 | { |
1610 | 1 | pdf_remove_item(ctx, fontdesc->storable.drop, dict); |
1611 | 1 | fz_rethrow(ctx); |
1612 | 1 | } |
1613 | 425 | } |
1614 | 9.88k | } |
1615 | 19.7k | fz_catch(ctx) |
1616 | 1 | { |
1617 | 1 | pdf_drop_font(ctx, fontdesc); |
1618 | 1 | fz_rethrow(ctx); |
1619 | 1 | } |
1620 | | |
1621 | 11.6k | return fontdesc; |
1622 | 11.6k | } |
1623 | | |
1624 | | void |
1625 | | pdf_print_font(fz_context *ctx, fz_output *out, pdf_font_desc *fontdesc) |
1626 | 0 | { |
1627 | 0 | int i; |
1628 | |
|
1629 | 0 | fz_write_printf(ctx, out, "fontdesc {\n"); |
1630 | |
|
1631 | 0 | if (fontdesc->font->ft_face) |
1632 | 0 | fz_write_printf(ctx, out, "\tfreetype font\n"); |
1633 | 0 | if (fontdesc->font->t3procs) |
1634 | 0 | fz_write_printf(ctx, out, "\ttype3 font\n"); |
1635 | |
|
1636 | 0 | fz_write_printf(ctx, out, "\twmode %d\n", fontdesc->wmode); |
1637 | 0 | fz_write_printf(ctx, out, "\tDW %d\n", fontdesc->dhmtx.w); |
1638 | |
|
1639 | 0 | fz_write_printf(ctx, out, "\tW {\n"); |
1640 | 0 | for (i = 0; i < fontdesc->hmtx_len; i++) |
1641 | 0 | fz_write_printf(ctx, out, "\t\t<%04x> <%04x> %d\n", |
1642 | 0 | fontdesc->hmtx[i].lo, fontdesc->hmtx[i].hi, fontdesc->hmtx[i].w); |
1643 | 0 | fz_write_printf(ctx, out, "\t}\n"); |
1644 | |
|
1645 | 0 | if (fontdesc->wmode) |
1646 | 0 | { |
1647 | 0 | fz_write_printf(ctx, out, "\tDW2 [%d %d]\n", fontdesc->dvmtx.y, fontdesc->dvmtx.w); |
1648 | 0 | fz_write_printf(ctx, out, "\tW2 {\n"); |
1649 | 0 | for (i = 0; i < fontdesc->vmtx_len; i++) |
1650 | 0 | fz_write_printf(ctx, out, "\t\t<%04x> <%04x> %d %d %d\n", fontdesc->vmtx[i].lo, fontdesc->vmtx[i].hi, |
1651 | 0 | fontdesc->vmtx[i].x, fontdesc->vmtx[i].y, fontdesc->vmtx[i].w); |
1652 | 0 | fz_write_printf(ctx, out, "\t}\n"); |
1653 | 0 | } |
1654 | 0 | } |