Coverage Report

Created: 2024-05-20 06:23

/src/mupdf/source/pdf/pdf-font.c
Line
Count
Source (jump to first uncovered line)
1
// Copyright (C) 2004-2022 Artifex Software, Inc.
2
//
3
// This file is part of MuPDF.
4
//
5
// MuPDF is free software: you can redistribute it and/or modify it under the
6
// terms of the GNU Affero General Public License as published by the Free
7
// Software Foundation, either version 3 of the License, or (at your option)
8
// any later version.
9
//
10
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13
// details.
14
//
15
// You should have received a copy of the GNU Affero General Public License
16
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17
//
18
// Alternative licensing terms are available from the licensor.
19
// For commercial licensing, see <https://www.artifex.com/> or contact
20
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21
// CA 94129, USA, for further information.
22
23
#include "mupdf/fitz.h"
24
#include "mupdf/pdf.h"
25
26
#include <assert.h>
27
28
#include <ft2build.h>
29
#include FT_FREETYPE_H
30
#include FT_ADVANCES_H
31
#ifdef FT_FONT_FORMATS_H
32
#include FT_FONT_FORMATS_H
33
#else
34
#include FT_XFREE86_H
35
#endif
36
#include FT_TRUETYPE_TABLES_H
37
38
#ifndef FT_SFNT_HEAD
39
#define FT_SFNT_HEAD ft_sfnt_head
40
#endif
41
42
void
43
pdf_load_encoding(const char **estrings, const char *encoding)
44
6.60k
{
45
6.60k
  const char * const *bstrings = NULL;
46
6.60k
  int i;
47
48
6.60k
  if (!strcmp(encoding, "StandardEncoding"))
49
3.02k
    bstrings = fz_glyph_name_from_adobe_standard;
50
6.60k
  if (!strcmp(encoding, "MacRomanEncoding"))
51
618
    bstrings = fz_glyph_name_from_mac_roman;
52
6.60k
  if (!strcmp(encoding, "MacExpertEncoding"))
53
0
    bstrings = fz_glyph_name_from_mac_expert;
54
6.60k
  if (!strcmp(encoding, "WinAnsiEncoding"))
55
2.69k
    bstrings = fz_glyph_name_from_win_ansi;
56
57
6.60k
  if (bstrings)
58
1.62M
    for (i = 0; i < 256; i++)
59
1.62M
      estrings[i] = bstrings[i];
60
6.60k
}
61
62
static void pdf_load_font_descriptor(fz_context *ctx, pdf_document *doc, pdf_font_desc *fontdesc, pdf_obj *dict,
63
  const char *collection, const char *basefont, int iscidfont);
64
65
static const char *base_font_names[][10] =
66
{
67
  { "Courier", "CourierNew", "CourierNewPSMT", NULL },
68
  { "Courier-Bold", "CourierNew,Bold", "Courier,Bold",
69
    "CourierNewPS-BoldMT", "CourierNew-Bold", NULL },
70
  { "Courier-Oblique", "CourierNew,Italic", "Courier,Italic",
71
    "CourierNewPS-ItalicMT", "CourierNew-Italic", NULL },
72
  { "Courier-BoldOblique", "CourierNew,BoldItalic", "Courier,BoldItalic",
73
    "CourierNewPS-BoldItalicMT", "CourierNew-BoldItalic", NULL },
74
  { "Helvetica", "ArialMT", "Arial", NULL },
75
  { "Helvetica-Bold", "Arial-BoldMT", "Arial,Bold", "Arial-Bold",
76
    "Helvetica,Bold", NULL },
77
  { "Helvetica-Oblique", "Arial-ItalicMT", "Arial,Italic", "Arial-Italic",
78
    "Helvetica,Italic", "Helvetica-Italic", NULL },
79
  { "Helvetica-BoldOblique", "Arial-BoldItalicMT",
80
    "Arial,BoldItalic", "Arial-BoldItalic",
81
    "Helvetica,BoldItalic", "Helvetica-BoldItalic", NULL },
82
  { "Times-Roman", "TimesNewRomanPSMT", "TimesNewRoman",
83
    "TimesNewRomanPS", NULL },
84
  { "Times-Bold", "TimesNewRomanPS-BoldMT", "TimesNewRoman,Bold",
85
    "TimesNewRomanPS-Bold", "TimesNewRoman-Bold", NULL },
86
  { "Times-Italic", "TimesNewRomanPS-ItalicMT", "TimesNewRoman,Italic",
87
    "TimesNewRomanPS-Italic", "TimesNewRoman-Italic", NULL },
88
  { "Times-BoldItalic", "TimesNewRomanPS-BoldItalicMT",
89
    "TimesNewRoman,BoldItalic", "TimesNewRomanPS-BoldItalic",
90
    "TimesNewRoman-BoldItalic", NULL },
91
  { "Symbol", "Symbol,Italic", "Symbol,Bold", "Symbol,BoldItalic",
92
    "SymbolMT", "SymbolMT,Italic", "SymbolMT,Bold", "SymbolMT,BoldItalic", NULL },
93
  { "ZapfDingbats", NULL }
94
};
95
96
const unsigned char *
97
pdf_lookup_substitute_font(fz_context *ctx, int mono, int serif, int bold, int italic, int *len)
98
3.24k
{
99
3.24k
  if (mono) {
100
88
    if (bold) {
101
12
      if (italic) return fz_lookup_base14_font(ctx, "Courier-BoldOblique", len);
102
12
      else return fz_lookup_base14_font(ctx, "Courier-Bold", len);
103
76
    } else {
104
76
      if (italic) return fz_lookup_base14_font(ctx, "Courier-Oblique", len);
105
73
      else return fz_lookup_base14_font(ctx, "Courier", len);
106
76
    }
107
3.16k
  } else if (serif) {
108
360
    if (bold) {
109
105
      if (italic) return fz_lookup_base14_font(ctx, "Times-BoldItalic", len);
110
85
      else return fz_lookup_base14_font(ctx, "Times-Bold", len);
111
255
    } else {
112
255
      if (italic) return fz_lookup_base14_font(ctx, "Times-Italic", len);
113
200
      else return fz_lookup_base14_font(ctx, "Times-Roman", len);
114
255
    }
115
2.80k
  } else {
116
2.80k
    if (bold) {
117
393
      if (italic) return fz_lookup_base14_font(ctx, "Helvetica-BoldOblique", len);
118
388
      else return fz_lookup_base14_font(ctx, "Helvetica-Bold", len);
119
2.40k
    } else {
120
2.40k
      if (italic) return fz_lookup_base14_font(ctx, "Helvetica-Oblique", len);
121
2.34k
      else return fz_lookup_base14_font(ctx, "Helvetica", len);
122
2.40k
    }
123
2.80k
  }
124
3.24k
}
125
126
static int is_dynalab(char *name)
127
2.69k
{
128
2.69k
  if (strstr(name, "HuaTian"))
129
0
    return 1;
130
2.69k
  if (strstr(name, "MingLi"))
131
0
    return 1;
132
2.69k
  if ((strstr(name, "DF") == name) || strstr(name, "+DF"))
133
0
    return 1;
134
2.69k
  if ((strstr(name, "DLC") == name) || strstr(name, "+DLC"))
135
469
    return 1;
136
2.22k
  return 0;
137
2.69k
}
138
139
static int strcmp_ignore_space(const char *a, const char *b)
140
393k
{
141
459k
  while (1)
142
459k
  {
143
459k
    while (*a == ' ')
144
0
      a++;
145
460k
    while (*b == ' ')
146
364
      b++;
147
459k
    if (*a != *b)
148
391k
      return 1;
149
68.2k
    if (*a == 0)
150
2.49k
      return *a != *b;
151
65.7k
    if (*b == 0)
152
0
      return *a != *b;
153
65.7k
    a++;
154
65.7k
    b++;
155
65.7k
  }
156
393k
}
157
158
const char *pdf_clean_font_name(const char *fontname)
159
7.27k
{
160
7.27k
  int i, k;
161
90.3k
  for (i = 0; i < (int)nelem(base_font_names); i++)
162
477k
    for (k = 0; base_font_names[i][k]; k++)
163
393k
      if (!strcmp_ignore_space(base_font_names[i][k], fontname))
164
2.49k
        return base_font_names[i][0];
165
4.78k
  return fontname;
166
7.27k
}
167
168
/*
169
 * FreeType and Rendering glue
170
 */
171
172
enum { UNKNOWN, TYPE1, TRUETYPE };
173
174
static int ft_kind(fz_context *ctx, FT_Face face)
175
18.2k
{
176
18.2k
  const char *kind;
177
18.2k
  fz_ft_lock(ctx);
178
18.2k
#ifdef FT_FONT_FORMATS_H
179
18.2k
  kind = FT_Get_Font_Format(face);
180
#else
181
  kind = FT_Get_X11_Font_Format(face);
182
#endif
183
18.2k
  fz_ft_unlock(ctx);
184
18.2k
  if (!strcmp(kind, "TrueType")) return TRUETYPE;
185
13.5k
  if (!strcmp(kind, "Type 1")) return TYPE1;
186
12.1k
  if (!strcmp(kind, "CFF")) return TYPE1;
187
0
  if (!strcmp(kind, "CID Type 1")) return TYPE1;
188
0
  return UNKNOWN;
189
0
}
190
191
static int ft_cid_to_gid(pdf_font_desc *fontdesc, int cid)
192
41.1M
{
193
41.1M
  if (fontdesc->to_ttf_cmap)
194
30.2M
  {
195
30.2M
    cid = pdf_lookup_cmap(fontdesc->to_ttf_cmap, cid);
196
197
    /* vertical presentation forms */
198
30.2M
    if (fontdesc->font->flags.ft_substitute && fontdesc->wmode)
199
360
    {
200
360
      switch (cid)
201
360
      {
202
0
      case 0x0021: cid = 0xFE15; break; /* ! */
203
0
      case 0x0028: cid = 0xFE35; break; /* ( */
204
0
      case 0x0029: cid = 0xFE36; break; /* ) */
205
0
      case 0x002C: cid = 0xFE10; break; /* , */
206
0
      case 0x003A: cid = 0xFE13; break; /* : */
207
0
      case 0x003B: cid = 0xFE14; break; /* ; */
208
0
      case 0x003F: cid = 0xFE16; break; /* ? */
209
0
      case 0x005B: cid = 0xFE47; break; /* [ */
210
0
      case 0x005D: cid = 0xFE48; break; /* ] */
211
0
      case 0x005F: cid = 0xFE33; break; /* _ */
212
0
      case 0x007B: cid = 0xFE37; break; /* { */
213
0
      case 0x007D: cid = 0xFE38; break; /* } */
214
0
      case 0x2013: cid = 0xFE32; break; /* EN DASH */
215
0
      case 0x2014: cid = 0xFE31; break; /* EM DASH */
216
0
      case 0x2025: cid = 0xFE30; break; /* TWO DOT LEADER */
217
0
      case 0x2026: cid = 0xFE19; break; /* HORIZONTAL ELLIPSIS */
218
0
      case 0x3001: cid = 0xFE11; break; /* IDEOGRAPHIC COMMA */
219
0
      case 0x3002: cid = 0xFE12; break; /* IDEOGRAPHIC FULL STOP */
220
0
      case 0x3008: cid = 0xFE3F; break; /* OPENING ANGLE BRACKET */
221
0
      case 0x3009: cid = 0xFE40; break; /* CLOSING ANGLE BRACKET */
222
0
      case 0x300A: cid = 0xFE3D; break; /* LEFT DOUBLE ANGLE BRACKET */
223
0
      case 0x300B: cid = 0xFE3E; break; /* RIGHT DOUBLE ANGLE BRACKET */
224
0
      case 0x300C: cid = 0xFE41; break; /* LEFT CORNER BRACKET */
225
0
      case 0x300D: cid = 0xFE42; break; /* RIGHT CORNER BRACKET */
226
0
      case 0x300E: cid = 0xFE43; break; /* LEFT WHITE CORNER BRACKET */
227
0
      case 0x300F: cid = 0xFE44; break; /* RIGHT WHITE CORNER BRACKET */
228
0
      case 0x3010: cid = 0xFE3B; break; /* LEFT BLACK LENTICULAR BRACKET */
229
0
      case 0x3011: cid = 0xFE3C; break; /* RIGHT BLACK LENTICULAR BRACKET */
230
0
      case 0x3014: cid = 0xFE39; break; /* LEFT TORTOISE SHELL BRACKET */
231
0
      case 0x3015: cid = 0xFE3A; break; /* RIGHT TORTOISE SHELL BRACKET */
232
0
      case 0x3016: cid = 0xFE17; break; /* LEFT WHITE LENTICULAR BRACKET */
233
0
      case 0x3017: cid = 0xFE18; break; /* RIGHT WHITE LENTICULAR BRACKET */
234
235
0
      case 0xFF01: cid = 0xFE15; break; /* FULLWIDTH EXCLAMATION MARK */
236
0
      case 0xFF08: cid = 0xFE35; break; /* FULLWIDTH LEFT PARENTHESIS */
237
0
      case 0xFF09: cid = 0xFE36; break; /* FULLWIDTH RIGHT PARENTHESIS */
238
0
      case 0xFF0C: cid = 0xFE10; break; /* FULLWIDTH COMMA */
239
0
      case 0xFF1A: cid = 0xFE13; break; /* FULLWIDTH COLON */
240
0
      case 0xFF1B: cid = 0xFE14; break; /* FULLWIDTH SEMICOLON */
241
0
      case 0xFF1F: cid = 0xFE16; break; /* FULLWIDTH QUESTION MARK */
242
0
      case 0xFF3B: cid = 0xFE47; break; /* FULLWIDTH LEFT SQUARE BRACKET */
243
0
      case 0xFF3D: cid = 0xFE48; break; /* FULLWIDTH RIGHT SQUARE BRACKET */
244
0
      case 0xFF3F: cid = 0xFE33; break; /* FULLWIDTH LOW LINE */
245
0
      case 0xFF5B: cid = 0xFE37; break; /* FULLWIDTH LEFT CURLY BRACKET */
246
0
      case 0xFF5D: cid = 0xFE38; break; /* FULLWIDTH RIGHT CURLY BRACKET */
247
248
18
      case 0x30FC: cid = 0xFE31; break; /* KATAKANA-HIRAGANA PROLONGED SOUND MARK */
249
0
      case 0xFF0D: cid = 0xFE31; break; /* FULLWIDTH HYPHEN-MINUS */
250
360
      }
251
360
    }
252
253
30.2M
    return ft_char_index(fontdesc->font->ft_face, cid);
254
30.2M
  }
255
256
10.9M
  if (fontdesc->cid_to_gid && (size_t)cid < fontdesc->cid_to_gid_len && cid >= 0)
257
7.36M
    return fontdesc->cid_to_gid[cid];
258
259
3.56M
  return cid;
260
10.9M
}
261
262
int
263
pdf_font_cid_to_gid(fz_context *ctx, pdf_font_desc *fontdesc, int cid)
264
40.0M
{
265
40.0M
  if (fontdesc->font->ft_face)
266
40.0M
  {
267
40.0M
    int gid;
268
40.0M
    fz_ft_lock(ctx);
269
40.0M
    gid = ft_cid_to_gid(fontdesc, cid);
270
40.0M
    fz_ft_unlock(ctx);
271
40.0M
    return gid;
272
40.0M
  }
273
66.6k
  return cid;
274
40.0M
}
275
276
static int ft_width(fz_context *ctx, pdf_font_desc *fontdesc, int cid)
277
1.13M
{
278
1.13M
  int mask = FT_LOAD_NO_SCALE | FT_LOAD_NO_HINTING | FT_LOAD_NO_BITMAP | FT_LOAD_IGNORE_TRANSFORM;
279
1.13M
  int gid = ft_cid_to_gid(fontdesc, cid);
280
1.13M
  FT_Fixed adv = 0;
281
1.13M
  int fterr;
282
1.13M
  FT_Face face = fontdesc->font->ft_face;
283
1.13M
  FT_UShort units_per_EM;
284
285
1.13M
  fterr = FT_Get_Advance(face, gid, mask, &adv);
286
1.13M
  if (fterr && fterr != FT_Err_Invalid_Argument)
287
9.65k
    fz_warn(ctx, "FT_Get_Advance(%d): %s", gid, ft_error_string(fterr));
288
289
1.13M
  units_per_EM = face->units_per_EM;
290
1.13M
  if (units_per_EM == 0)
291
0
    units_per_EM = 2048;
292
293
1.13M
  return adv * 1000 / units_per_EM;
294
1.13M
}
295
296
static const struct { int code; const char *name; } mre_diff_table[] =
297
{
298
  { 173, "notequal" },
299
  { 176, "infinity" },
300
  { 178, "lessequal" },
301
  { 179, "greaterequal" },
302
  { 182, "partialdiff" },
303
  { 183, "summation" },
304
  { 184, "product" },
305
  { 185, "pi" },
306
  { 186, "integral" },
307
  { 189, "Omega" },
308
  { 195, "radical" },
309
  { 197, "approxequal" },
310
  { 198, "Delta" },
311
  { 215, "lozenge" },
312
  { 219, "Euro" },
313
  { 240, "apple" },
314
};
315
316
static int lookup_mre_code(const char *name)
317
33.2k
{
318
33.2k
  int i;
319
565k
  for (i = 0; i < (int)nelem(mre_diff_table); ++i)
320
532k
    if (!strcmp(name, mre_diff_table[i].name))
321
27
      return mre_diff_table[i].code;
322
4.70M
  for (i = 0; i < 256; i++)
323
4.69M
    if (fz_glyph_name_from_mac_roman[i] && !strcmp(name, fz_glyph_name_from_mac_roman[i]))
324
32.7k
      return i;
325
486
  return -1;
326
33.2k
}
327
328
static int ft_find_glyph_by_unicode_name(FT_Face face, const char *name)
329
140k
{
330
140k
  int unicode, glyph;
331
332
  /* Prefer exact unicode match if available. */
333
140k
  unicode = fz_unicode_from_glyph_name_strict(name);
334
140k
  if (unicode > 0)
335
138k
  {
336
138k
    glyph = ft_char_index(face, unicode);
337
138k
    if (glyph > 0)
338
40.2k
      return glyph;
339
138k
  }
340
341
  /* Fall back to font glyph name if we can. */
342
99.8k
  glyph = ft_name_index(face, name);
343
99.8k
  if (glyph > 0)
344
282
    return glyph;
345
346
  /* Fuzzy unicode match as last attempt. */
347
99.5k
  unicode = fz_unicode_from_glyph_name(name);
348
99.5k
  if (unicode > 0)
349
99.5k
    return ft_char_index(face, unicode);
350
351
  /* Failed. */
352
0
  return 0;
353
99.5k
}
354
355
/*
356
 * Load font files.
357
 */
358
359
static void
360
pdf_load_builtin_font(fz_context *ctx, pdf_font_desc *fontdesc, const char *fontname, int has_descriptor)
361
3.89k
{
362
3.89k
  FT_Face face;
363
3.89k
  const char *clean_name = pdf_clean_font_name(fontname);
364
3.89k
  if (clean_name == fontname)
365
2.45k
    clean_name = "Times-Roman";
366
367
3.89k
  fontdesc->font = fz_load_system_font(ctx, fontname, 0, 0, !has_descriptor);
368
3.89k
  if (!fontdesc->font)
369
3.89k
  {
370
3.89k
    const unsigned char *data;
371
3.89k
    int len;
372
373
3.89k
    data = fz_lookup_base14_font(ctx, clean_name, &len);
374
3.89k
    if (!data)
375
0
      fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot find builtin font: '%s'", fontname);
376
377
3.89k
    fontdesc->font = fz_new_font_from_memory(ctx, fontname, data, len, 0, 1);
378
3.89k
    fontdesc->font->flags.is_serif = !!strstr(clean_name, "Times");
379
3.89k
  }
380
381
3.89k
  if (!strcmp(clean_name, "Symbol") || !strcmp(clean_name, "ZapfDingbats"))
382
223
    fontdesc->flags |= PDF_FD_SYMBOLIC;
383
384
3.89k
  face = fontdesc->font->ft_face;
385
3.89k
  fontdesc->ascent = 1000.0f * face->ascender / face->units_per_EM;
386
3.89k
  fontdesc->descent = 1000.0f * face->descender / face->units_per_EM;
387
3.89k
}
388
389
static void
390
pdf_load_substitute_font(fz_context *ctx, pdf_font_desc *fontdesc, const char *fontname, int mono, int serif, int bold, int italic)
391
3.24k
{
392
3.24k
  fontdesc->font = fz_load_system_font(ctx, fontname, bold, italic, 0);
393
3.24k
  if (!fontdesc->font)
394
3.24k
  {
395
3.24k
    const unsigned char *data;
396
3.24k
    int len;
397
398
3.24k
    data = pdf_lookup_substitute_font(ctx, mono, serif, bold, italic, &len);
399
3.24k
    if (!data)
400
0
      fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot find substitute font");
401
402
3.24k
    fontdesc->font = fz_new_font_from_memory(ctx, fontname, data, len, 0, 1);
403
3.24k
    fontdesc->font->flags.fake_bold = bold && !fontdesc->font->flags.is_bold;
404
3.24k
    fontdesc->font->flags.fake_italic = italic && !fontdesc->font->flags.is_italic;
405
406
3.24k
    fontdesc->font->flags.is_mono = mono;
407
3.24k
    fontdesc->font->flags.is_serif = serif;
408
3.24k
    fontdesc->font->flags.is_bold = bold;
409
3.24k
    fontdesc->font->flags.is_italic = italic;
410
3.24k
  }
411
412
3.24k
  fontdesc->font->flags.ft_substitute = 1;
413
3.24k
  fontdesc->font->flags.ft_stretch = 1;
414
3.24k
}
415
416
static void
417
pdf_load_substitute_cjk_font(fz_context *ctx, pdf_font_desc *fontdesc, const char *fontname, int ros, int serif)
418
291
{
419
291
  fontdesc->font = fz_load_system_cjk_font(ctx, fontname, ros, serif);
420
291
  if (!fontdesc->font)
421
291
  {
422
291
    const unsigned char *data;
423
291
    int size;
424
291
    int subfont;
425
426
291
    data = fz_lookup_cjk_font(ctx, ros, &size, &subfont);
427
291
    if (!data)
428
0
      fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot find builtin CJK font");
429
430
    /* A glyph bbox cache is too big for CJK fonts. */
431
291
    fontdesc->font = fz_new_font_from_memory(ctx, fontname, data, size, subfont, 0);
432
291
  }
433
434
291
  fontdesc->font->flags.ft_substitute = 1;
435
291
  fontdesc->font->flags.ft_stretch = 0;
436
291
  fontdesc->font->flags.cjk = 1;
437
291
  fontdesc->font->flags.cjk_lang = ros;
438
291
}
439
440
static struct { int ros, serif; const char *name; } known_cjk_fonts[] = {
441
  { FZ_ADOBE_GB, 1, "SimFang" },
442
  { FZ_ADOBE_GB, 0, "SimHei" },
443
  { FZ_ADOBE_GB, 1, "SimKai" },
444
  { FZ_ADOBE_GB, 1, "SimLi" },
445
  { FZ_ADOBE_GB, 1, "SimSun" },
446
  { FZ_ADOBE_GB, 1, "Song" },
447
448
  { FZ_ADOBE_CNS, 1, "MingLiU" },
449
450
  { FZ_ADOBE_JAPAN, 0, "Gothic" },
451
  { FZ_ADOBE_JAPAN, 1, "Mincho" },
452
453
  { FZ_ADOBE_KOREA, 1, "Batang" },
454
  { FZ_ADOBE_KOREA, 0, "Gulim" },
455
  { FZ_ADOBE_KOREA, 0, "Dotum" },
456
};
457
458
static int match_font_name(const char *s, const char *ref)
459
11.3k
{
460
11.3k
  return !!strstr(s, ref);
461
11.3k
}
462
463
static void
464
pdf_load_system_font(fz_context *ctx, pdf_font_desc *fontdesc, const char *fontname, const char *collection)
465
3.53k
{
466
3.53k
  int bold = 0;
467
3.53k
  int italic = 0;
468
3.53k
  int serif = 0;
469
3.53k
  int mono = 0;
470
471
3.53k
  if (strstr(fontname, "Bold"))
472
548
    bold = 1;
473
3.53k
  if (strstr(fontname, "Italic"))
474
76
    italic = 1;
475
3.53k
  if (strstr(fontname, "Oblique"))
476
0
    italic = 1;
477
478
3.53k
  if (fontdesc->flags & PDF_FD_FIXED_PITCH)
479
93
    mono = 1;
480
3.53k
  if (fontdesc->flags & PDF_FD_SERIF)
481
546
    serif = 1;
482
3.53k
  if (fontdesc->flags & PDF_FD_ITALIC)
483
132
    italic = 1;
484
3.53k
  if (fontdesc->flags & PDF_FD_FORCE_BOLD)
485
27
    bold = 1;
486
487
3.53k
  if (collection)
488
1.21k
  {
489
1.21k
    if (!strcmp(collection, "Adobe-CNS1"))
490
42
      pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_CNS, serif);
491
1.16k
    else if (!strcmp(collection, "Adobe-GB1"))
492
26
      pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_GB, serif);
493
1.14k
    else if (!strcmp(collection, "Adobe-Japan1"))
494
136
      pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_JAPAN, serif);
495
1.00k
    else if (!strcmp(collection, "Adobe-Korea1"))
496
47
      pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_KOREA, serif);
497
959
    else
498
959
    {
499
959
      size_t i;
500
959
      if (strcmp(collection, "Adobe-Identity") != 0)
501
206
        fz_warn(ctx, "unknown cid collection: %s", collection);
502
503
      // Recognize common CJK fonts when using Identity or other non-CJK CMap
504
12.2k
      for (i = 0; i < nelem(known_cjk_fonts); ++i)
505
11.3k
      {
506
11.3k
        if (match_font_name(fontname, known_cjk_fonts[i].name))
507
40
        {
508
40
          pdf_load_substitute_cjk_font(ctx, fontdesc, fontname,
509
40
            known_cjk_fonts[i].ros, known_cjk_fonts[i].serif);
510
40
          return;
511
40
        }
512
11.3k
      }
513
514
919
      pdf_load_substitute_font(ctx, fontdesc, fontname, mono, serif, bold, italic);
515
919
    }
516
1.21k
  }
517
2.32k
  else
518
2.32k
  {
519
2.32k
    pdf_load_substitute_font(ctx, fontdesc, fontname, mono, serif, bold, italic);
520
2.32k
  }
521
3.53k
}
522
523
42
#define TTF_U16(p) ((uint16_t) ((p)[0]<<8) | ((p)[1]))
524
72
#define TTF_U32(p) ((uint32_t) ((p)[0]<<24) | ((p)[1]<<16) | ((p)[2]<<8) | ((p)[3]))
525
526
static fz_buffer *
527
pdf_extract_cff_subtable(fz_context *ctx, unsigned char *data, size_t size)
528
42
{
529
42
  size_t num_tables = TTF_U16(data + 4);
530
42
  size_t i;
531
532
42
  if (12 + num_tables * 16 > size)
533
4
    fz_throw(ctx, FZ_ERROR_SYNTAX, "invalid TTF header");
534
535
308
  for (i = 0; i < num_tables; ++i)
536
306
  {
537
306
    unsigned char *record = data + 12 + i * 16;
538
306
    if (!memcmp("CFF ", record, 4))
539
36
    {
540
36
      uint64_t offset = TTF_U32(record + 8);
541
36
      uint64_t length = TTF_U32(record + 12);
542
36
      uint64_t end = offset + length;
543
36
      if (end > size)
544
7
        fz_throw(ctx, FZ_ERROR_SYNTAX, "invalid TTF subtable offset/length");
545
29
      return fz_new_buffer_from_copied_data(ctx, data + offset, length);
546
36
    }
547
306
  }
548
549
2
  return NULL;
550
38
}
551
552
static void
553
pdf_load_embedded_font(fz_context *ctx, pdf_document *doc, pdf_font_desc *fontdesc, const char *fontname, pdf_obj *stmref)
554
7.26k
{
555
7.26k
  fz_buffer *buf;
556
7.26k
  unsigned char *data;
557
7.26k
  size_t size;
558
559
7.26k
  fz_var(buf);
560
561
7.26k
  buf = pdf_load_stream(ctx, stmref);
562
563
13.5k
  fz_try(ctx)
564
13.5k
  {
565
    /* Extract CFF subtable for OpenType fonts: */
566
6.75k
    size = fz_buffer_storage(ctx, buf, &data);
567
6.75k
    if (size > 12) {
568
6.45k
      if (!memcmp("OTTO", data, 4)) {
569
42
        fz_buffer *cff = pdf_extract_cff_subtable(ctx, data, size);
570
42
        if (cff)
571
29
        {
572
29
          fz_drop_buffer(ctx, buf);
573
29
          buf = cff;
574
29
        }
575
42
      }
576
6.45k
    }
577
578
6.75k
    fontdesc->font = fz_new_font_from_buffer(ctx, fontname, buf, 0, 1);
579
6.75k
  }
580
13.5k
  fz_always(ctx)
581
6.75k
    fz_drop_buffer(ctx, buf);
582
6.75k
  fz_catch(ctx)
583
2.23k
    fz_rethrow(ctx);
584
585
5.02k
  fontdesc->size += fz_buffer_storage(ctx, buf, NULL);
586
5.02k
  fontdesc->is_embedded = 1;
587
5.02k
}
588
589
/*
590
 * Create and destroy
591
 */
592
593
pdf_font_desc *
594
pdf_keep_font(fz_context *ctx, pdf_font_desc *fontdesc)
595
232k
{
596
232k
  return fz_keep_storable(ctx, &fontdesc->storable);
597
232k
}
598
599
void
600
pdf_drop_font(fz_context *ctx, pdf_font_desc *fontdesc)
601
844k
{
602
844k
  fz_drop_storable(ctx, &fontdesc->storable);
603
844k
}
604
605
static int
606
pdf_font_is_droppable(fz_context *ctx, fz_storable *fontdesc)
607
5
{
608
  /* If we aren't holding the FT lock, then we can drop. */
609
5
  return !fz_ft_lock_held(ctx);
610
5
}
611
612
static void
613
pdf_drop_font_imp(fz_context *ctx, fz_storable *fontdesc_)
614
12.4k
{
615
12.4k
  pdf_font_desc *fontdesc = (pdf_font_desc *)fontdesc_;
616
617
12.4k
  fz_drop_font(ctx, fontdesc->font);
618
12.4k
  pdf_drop_cmap(ctx, fontdesc->encoding);
619
12.4k
  pdf_drop_cmap(ctx, fontdesc->to_ttf_cmap);
620
12.4k
  pdf_drop_cmap(ctx, fontdesc->to_unicode);
621
12.4k
  fz_free(ctx, fontdesc->cid_to_gid);
622
12.4k
  fz_free(ctx, fontdesc->cid_to_ucs);
623
12.4k
  fz_free(ctx, fontdesc->hmtx);
624
12.4k
  fz_free(ctx, fontdesc->vmtx);
625
12.4k
  fz_free(ctx, fontdesc);
626
12.4k
}
627
628
pdf_font_desc *
629
pdf_new_font_desc(fz_context *ctx)
630
12.4k
{
631
12.4k
  pdf_font_desc *fontdesc;
632
633
12.4k
  fontdesc = fz_malloc_struct(ctx, pdf_font_desc);
634
12.4k
  FZ_INIT_AWKWARD_STORABLE(fontdesc, 1, pdf_drop_font_imp, pdf_font_is_droppable);
635
12.4k
  fontdesc->size = sizeof(pdf_font_desc);
636
637
12.4k
  fontdesc->font = NULL;
638
639
12.4k
  fontdesc->flags = 0;
640
12.4k
  fontdesc->italic_angle = 0;
641
12.4k
  fontdesc->ascent = 800;
642
12.4k
  fontdesc->descent = -200;
643
12.4k
  fontdesc->cap_height = 800;
644
12.4k
  fontdesc->x_height = 500;
645
12.4k
  fontdesc->missing_width = 0;
646
647
12.4k
  fontdesc->encoding = NULL;
648
12.4k
  fontdesc->to_ttf_cmap = NULL;
649
12.4k
  fontdesc->cid_to_gid_len = 0;
650
12.4k
  fontdesc->cid_to_gid = NULL;
651
652
12.4k
  fontdesc->to_unicode = NULL;
653
12.4k
  fontdesc->cid_to_ucs_len = 0;
654
12.4k
  fontdesc->cid_to_ucs = NULL;
655
656
12.4k
  fontdesc->wmode = 0;
657
658
12.4k
  fontdesc->hmtx_cap = 0;
659
12.4k
  fontdesc->vmtx_cap = 0;
660
12.4k
  fontdesc->hmtx_len = 0;
661
12.4k
  fontdesc->vmtx_len = 0;
662
12.4k
  fontdesc->hmtx = NULL;
663
12.4k
  fontdesc->vmtx = NULL;
664
665
12.4k
  fontdesc->dhmtx.lo = 0x0000;
666
12.4k
  fontdesc->dhmtx.hi = 0xFFFF;
667
12.4k
  fontdesc->dhmtx.w = 1000;
668
669
12.4k
  fontdesc->dvmtx.lo = 0x0000;
670
12.4k
  fontdesc->dvmtx.hi = 0xFFFF;
671
12.4k
  fontdesc->dvmtx.x = 0;
672
12.4k
  fontdesc->dvmtx.y = 880;
673
12.4k
  fontdesc->dvmtx.w = -1000;
674
675
12.4k
  fontdesc->is_embedded = 0;
676
677
12.4k
  return fontdesc;
678
12.4k
}
679
680
/*
681
 * Simple fonts (Type1 and TrueType)
682
 */
683
684
static FT_CharMap
685
select_type1_cmap(FT_Face face)
686
7.80k
{
687
7.80k
  int i;
688
15.5k
  for (i = 0; i < face->num_charmaps; i++)
689
15.5k
    if (face->charmaps[i]->platform_id == 7)
690
7.79k
      return face->charmaps[i];
691
6
  if (face->num_charmaps > 0)
692
1
    return face->charmaps[0];
693
5
  return NULL;
694
6
}
695
696
static FT_CharMap
697
select_truetype_cmap(fz_context *ctx, FT_Face face, int symbolic)
698
1.98k
{
699
1.98k
  int i;
700
701
  /* First look for a Microsoft symbolic cmap, if applicable */
702
1.98k
  if (symbolic)
703
715
  {
704
3.04k
    for (i = 0; i < face->num_charmaps; i++)
705
2.40k
      if (face->charmaps[i]->platform_id == 3 && face->charmaps[i]->encoding_id == 0)
706
69
        return face->charmaps[i];
707
715
  }
708
709
1.91k
  fz_ft_lock(ctx);
710
711
  /* Then look for a Microsoft Unicode cmap */
712
5.39k
  for (i = 0; i < face->num_charmaps; i++)
713
4.14k
    if (face->charmaps[i]->platform_id == 3 && face->charmaps[i]->encoding_id == 1)
714
788
      if (FT_Get_CMap_Format(face->charmaps[i]) != -1)
715
674
      {
716
674
        fz_ft_unlock(ctx);
717
674
        return face->charmaps[i];
718
674
      }
719
720
  /* Finally look for an Apple MacRoman cmap */
721
3.03k
  for (i = 0; i < face->num_charmaps; i++)
722
2.56k
    if (face->charmaps[i]->platform_id == 1 && face->charmaps[i]->encoding_id == 0)
723
773
      if (FT_Get_CMap_Format(face->charmaps[i]) != -1)
724
773
      {
725
773
        fz_ft_unlock(ctx);
726
773
        return face->charmaps[i];
727
773
      }
728
729
471
  if (face->num_charmaps > 0)
730
57
    if (FT_Get_CMap_Format(face->charmaps[0]) != -1)
731
33
    {
732
33
      fz_ft_unlock(ctx);
733
33
      return face->charmaps[0];
734
33
    }
735
736
438
  fz_ft_unlock(ctx);
737
438
  return NULL;
738
471
}
739
740
static FT_CharMap
741
select_unknown_cmap(FT_Face face)
742
0
{
743
0
  if (face->num_charmaps > 0)
744
0
    return face->charmaps[0];
745
0
  return NULL;
746
0
}
747
748
static int use_s22pdf_workaround(fz_context *ctx, pdf_obj *dict, pdf_obj *descriptor)
749
9.78k
{
750
9.78k
  if (descriptor)
751
6.29k
  {
752
6.29k
    if (pdf_dict_get(ctx, dict, PDF_NAME(Encoding)) != PDF_NAME(WinAnsiEncoding))
753
4.32k
      return 0;
754
1.96k
    if (pdf_dict_get_int(ctx, descriptor, PDF_NAME(Flags)) != 4)
755
1.82k
      return 0;
756
142
    return 1;
757
1.96k
  }
758
3.49k
  return 0;
759
9.78k
}
760
761
static pdf_font_desc *
762
pdf_load_simple_font(fz_context *ctx, pdf_document *doc, pdf_obj *dict)
763
9.78k
{
764
9.78k
  const char *basefont;
765
9.78k
  pdf_obj *descriptor;
766
9.78k
  pdf_obj *encoding;
767
9.78k
  pdf_obj *widths;
768
9.78k
  unsigned short *etable = NULL;
769
9.78k
  pdf_font_desc *fontdesc = NULL;
770
9.78k
  pdf_obj *subtype;
771
9.78k
  FT_Face face;
772
9.78k
  FT_CharMap cmap;
773
9.78k
  int symbolic;
774
9.78k
  int kind;
775
9.78k
  int glyph;
776
777
9.78k
  const char *estrings[256];
778
9.78k
  char ebuffer[256][32];
779
9.78k
  int i, k, n;
780
9.78k
  int fterr;
781
9.78k
  int has_lock = 0;
782
783
9.78k
  fz_var(fontdesc);
784
9.78k
  fz_var(etable);
785
9.78k
  fz_var(has_lock);
786
787
  /* Load font file */
788
19.5k
  fz_try(ctx)
789
19.5k
  {
790
9.78k
    fontdesc = pdf_new_font_desc(ctx);
791
792
9.78k
    basefont = pdf_dict_get_name(ctx, dict, PDF_NAME(BaseFont));
793
794
9.78k
    descriptor = pdf_dict_get(ctx, dict, PDF_NAME(FontDescriptor));
795
9.78k
    if (descriptor)
796
6.29k
      pdf_load_font_descriptor(ctx, doc, fontdesc, descriptor, NULL, basefont, 0);
797
3.49k
    else
798
3.49k
      pdf_load_builtin_font(ctx, fontdesc, basefont, 0);
799
800
    /* Some chinese documents mistakenly consider WinAnsiEncoding to be codepage 936 */
801
9.78k
    if (use_s22pdf_workaround(ctx, dict, descriptor))
802
142
    {
803
142
      char *cp936fonts[] = {
804
142
        "\xCB\xCE\xCC\xE5", "SimSun,Regular",
805
142
        "\xBA\xDA\xCC\xE5", "SimHei,Regular",
806
142
        "\xBF\xAC\xCC\xE5_GB2312", "SimKai,Regular",
807
142
        "\xB7\xC2\xCB\xCE_GB2312", "SimFang,Regular",
808
142
        "\xC1\xA5\xCA\xE9", "SimLi,Regular",
809
142
        NULL
810
142
      };
811
852
      for (i = 0; cp936fonts[i]; i += 2)
812
710
        if (!strcmp(basefont, cp936fonts[i]))
813
0
          break;
814
142
      if (cp936fonts[i])
815
0
      {
816
0
        fz_warn(ctx, "workaround for S22PDF lying about chinese font encodings");
817
0
        pdf_drop_font(ctx, fontdesc);
818
0
        fontdesc = NULL;
819
0
        fontdesc = pdf_new_font_desc(ctx);
820
0
        pdf_load_font_descriptor(ctx, doc, fontdesc, descriptor, "Adobe-GB1", cp936fonts[i+1], 0);
821
0
        fontdesc->encoding = pdf_load_system_cmap(ctx, "GBK-EUC-H");
822
0
        fontdesc->to_unicode = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2");
823
0
        fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2");
824
825
0
        goto skip_encoding;
826
0
      }
827
142
    }
828
829
9.78k
    face = fontdesc->font->ft_face;
830
9.78k
    kind = ft_kind(ctx, face);
831
832
    /* Encoding */
833
834
9.78k
    symbolic = fontdesc->flags & 4;
835
    /* Bug 703273: If non-symbolic, we're not symbolic. */
836
9.78k
    if (fontdesc->flags & 32)
837
2.69k
      symbolic = 0;
838
839
9.78k
    if (kind == TYPE1)
840
7.80k
      cmap = select_type1_cmap(face);
841
1.98k
    else if (kind == TRUETYPE)
842
1.98k
      cmap = select_truetype_cmap(ctx, face, symbolic);
843
0
    else
844
0
      cmap = select_unknown_cmap(face);
845
846
9.78k
    if (cmap)
847
9.34k
    {
848
9.34k
      fz_ft_lock(ctx);
849
9.34k
      fterr = FT_Set_Charmap(face, cmap);
850
9.34k
      fz_ft_unlock(ctx);
851
9.34k
      if (fterr)
852
0
        fz_warn(ctx, "freetype could not set cmap: %s", ft_error_string(fterr));
853
9.34k
    }
854
443
    else
855
443
      fz_warn(ctx, "freetype could not find any cmaps");
856
857
    /* FIXME: etable may leak on error. */
858
9.78k
    etable = Memento_label(fz_malloc_array(ctx, 256, unsigned short), "cid_to_gid");
859
9.78k
    fontdesc->size += 256 * sizeof(unsigned short);
860
2.51M
    for (i = 0; i < 256; i++)
861
2.50M
    {
862
2.50M
      estrings[i] = NULL;
863
2.50M
      etable[i] = 0;
864
2.50M
    }
865
866
9.78k
    encoding = pdf_dict_get(ctx, dict, PDF_NAME(Encoding));
867
9.78k
    if (encoding)
868
5.36k
    {
869
5.36k
      if (pdf_is_name(ctx, encoding))
870
3.40k
        pdf_load_encoding(estrings, pdf_to_name(ctx, encoding));
871
872
5.36k
      if (pdf_is_dict(ctx, encoding))
873
1.69k
      {
874
1.69k
        pdf_obj *base, *diff, *item;
875
876
1.69k
        base = pdf_dict_get(ctx, encoding, PDF_NAME(BaseEncoding));
877
1.69k
        if (pdf_is_name(ctx, base))
878
152
          pdf_load_encoding(estrings, pdf_to_name(ctx, base));
879
1.54k
        else if (!fontdesc->is_embedded && !symbolic)
880
348
          pdf_load_encoding(estrings, "StandardEncoding");
881
882
1.69k
        diff = pdf_dict_get(ctx, encoding, PDF_NAME(Differences));
883
1.69k
        if (pdf_is_array(ctx, diff))
884
1.65k
        {
885
1.65k
          n = pdf_array_len(ctx, diff);
886
1.65k
          k = 0;
887
125k
          for (i = 0; i < n; i++)
888
123k
          {
889
123k
            item = pdf_array_get(ctx, diff, i);
890
123k
            if (pdf_is_int(ctx, item))
891
11.9k
              k = pdf_to_int(ctx, item);
892
123k
            if (pdf_is_name(ctx, item) && k >= 0 && k < (int)nelem(estrings))
893
110k
              estrings[k++] = pdf_to_name(ctx, item);
894
123k
          }
895
1.65k
        }
896
1.69k
      }
897
5.36k
    }
898
4.42k
    else if (!fontdesc->is_embedded && !symbolic)
899
2.67k
      pdf_load_encoding(estrings, "StandardEncoding");
900
901
9.78k
    fz_ft_lock(ctx);
902
9.78k
    has_lock = 1;
903
904
    /* start with the builtin encoding */
905
2.51M
    for (i = 0; i < 256; i++)
906
2.50M
      etable[i] = ft_char_index(face, i);
907
908
    /* built-in and substitute fonts may be a different type than what the document expects */
909
9.78k
    subtype = pdf_dict_get(ctx, dict, PDF_NAME(Subtype));
910
9.78k
    if (pdf_name_eq(ctx, subtype, PDF_NAME(Type1)))
911
4.01k
      kind = TYPE1;
912
5.77k
    else if (pdf_name_eq(ctx, subtype, PDF_NAME(MMType1)))
913
179
      kind = TYPE1;
914
5.59k
    else if (pdf_name_eq(ctx, subtype, PDF_NAME(TrueType)))
915
2.74k
      kind = TRUETYPE;
916
2.85k
    else if (pdf_name_eq(ctx, subtype, PDF_NAME(CIDFontType0)))
917
0
      kind = TYPE1;
918
2.85k
    else if (pdf_name_eq(ctx, subtype, PDF_NAME(CIDFontType2)))
919
14
      kind = TRUETYPE;
920
921
    /* encode by glyph name where we can */
922
9.78k
    if (kind == TYPE1)
923
6.84k
    {
924
1.75M
      for (i = 0; i < 256; i++)
925
1.75M
      {
926
1.75M
        if (estrings[i])
927
864k
        {
928
864k
          glyph = ft_name_index(face, estrings[i]);
929
864k
          if (glyph > 0)
930
737k
            etable[i] = glyph;
931
864k
        }
932
1.75M
      }
933
6.84k
    }
934
935
    /* encode by glyph name where we can */
936
9.78k
    if (kind == TRUETYPE)
937
2.94k
    {
938
      /* Unicode cmap */
939
2.94k
      if (!symbolic && face->charmap && face->charmap->platform_id == 3)
940
681
      {
941
175k
        for (i = 0; i < 256; i++)
942
174k
        {
943
174k
          if (estrings[i])
944
140k
          {
945
140k
            glyph = ft_find_glyph_by_unicode_name(face, estrings[i]);
946
140k
            if (glyph > 0)
947
41.7k
              etable[i] = glyph;
948
140k
          }
949
174k
        }
950
681
      }
951
952
      /* MacRoman cmap */
953
2.26k
      else if (!symbolic && face->charmap && face->charmap->platform_id == 1)
954
288
      {
955
74.0k
        for (i = 0; i < 256; i++)
956
73.7k
        {
957
73.7k
          if (estrings[i])
958
33.2k
          {
959
33.2k
            int mrcode = lookup_mre_code(estrings[i]);
960
33.2k
            glyph = 0;
961
33.2k
            if (mrcode > 0)
962
32.8k
              glyph = ft_char_index(face, mrcode);
963
33.2k
            if (glyph == 0)
964
26.6k
              glyph = ft_name_index(face, estrings[i]);
965
33.2k
            if (glyph > 0)
966
6.76k
              etable[i] = glyph;
967
33.2k
          }
968
73.7k
        }
969
288
      }
970
971
      /* Symbolic cmap */
972
1.97k
      else if (!face->charmap || face->charmap->encoding != FT_ENCODING_MS_SYMBOL)
973
1.90k
      {
974
490k
        for (i = 0; i < 256; i++)
975
488k
        {
976
488k
          if (estrings[i])
977
226k
          {
978
226k
            glyph = ft_name_index(face, estrings[i]);
979
226k
            if (glyph > 0)
980
180k
              etable[i] = glyph;
981
226k
          }
982
488k
        }
983
1.90k
      }
984
2.94k
    }
985
986
    /* try to reverse the glyph names from the builtin encoding */
987
2.51M
    for (i = 0; i < 256; i++)
988
2.50M
    {
989
2.50M
      if (etable[i] && !estrings[i])
990
293k
      {
991
293k
        if (FT_HAS_GLYPH_NAMES(face))
992
235k
        {
993
235k
          fterr = FT_Get_Glyph_Name(face, etable[i], ebuffer[i], 32);
994
235k
          if (fterr)
995
0
            fz_warn(ctx, "freetype get glyph name (gid %d): %s", etable[i], ft_error_string(fterr));
996
235k
          if (ebuffer[i][0])
997
235k
            estrings[i] = ebuffer[i];
998
235k
        }
999
57.3k
        else
1000
57.3k
        {
1001
57.3k
          estrings[i] = (char*) fz_glyph_name_from_win_ansi[i]; /* discard const */
1002
57.3k
        }
1003
293k
      }
1004
2.50M
    }
1005
1006
    /* symbolic Type 1 fonts with an implicit encoding and non-standard glyph names */
1007
9.78k
    if (kind == TYPE1 && symbolic)
1008
2.02k
    {
1009
521k
      for (i = 0; i < 256; i++)
1010
519k
        if (etable[i] && estrings[i] && !fz_unicode_from_glyph_name(estrings[i]))
1011
0
          estrings[i] = (char*) fz_glyph_name_from_adobe_standard[i];
1012
2.02k
    }
1013
1014
9.78k
    fz_ft_unlock(ctx);
1015
9.78k
    has_lock = 0;
1016
1017
9.78k
    fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 1);
1018
9.78k
    fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding);
1019
9.78k
    fontdesc->cid_to_gid_len = 256;
1020
9.78k
    fontdesc->cid_to_gid = etable;
1021
1022
19.5k
    fz_try(ctx)
1023
19.5k
    {
1024
9.78k
      pdf_load_to_unicode(ctx, doc, fontdesc, estrings, NULL, pdf_dict_get(ctx, dict, PDF_NAME(ToUnicode)));
1025
9.78k
    }
1026
19.5k
    fz_catch(ctx)
1027
1
    {
1028
1
      fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
1029
1
      fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
1030
1
      fz_report_error(ctx);
1031
1
      fz_warn(ctx, "cannot load ToUnicode CMap");
1032
1
    }
1033
1034
9.78k
  skip_encoding:
1035
1036
    /* Widths */
1037
1038
9.78k
    pdf_set_default_hmtx(ctx, fontdesc, fontdesc->missing_width);
1039
1040
9.78k
    widths = pdf_dict_get(ctx, dict, PDF_NAME(Widths));
1041
9.78k
    if (widths)
1042
5.36k
    {
1043
5.36k
      int first, last;
1044
1045
5.36k
      first = pdf_dict_get_int(ctx, dict, PDF_NAME(FirstChar));
1046
5.36k
      last = pdf_dict_get_int(ctx, dict, PDF_NAME(LastChar));
1047
1048
5.36k
      if (first < 0 || last > 255 || first > last)
1049
195
        first = last = 0;
1050
1051
499k
      for (i = 0; i < last - first + 1; i++)
1052
494k
      {
1053
494k
        int wid = pdf_array_get_int(ctx, widths, i);
1054
494k
        pdf_add_hmtx(ctx, fontdesc, i + first, i + first, wid);
1055
494k
      }
1056
5.36k
    }
1057
4.42k
    else
1058
4.42k
    {
1059
4.42k
      fz_ft_lock(ctx);
1060
4.42k
      has_lock = 1;
1061
1.13M
      for (i = 0; i < 256; i++)
1062
1.13M
        pdf_add_hmtx(ctx, fontdesc, i, i, ft_width(ctx, fontdesc, i));
1063
4.42k
      fz_ft_unlock(ctx);
1064
4.42k
      has_lock = 0;
1065
4.42k
    }
1066
1067
9.78k
    pdf_end_hmtx(ctx, fontdesc);
1068
9.78k
  }
1069
19.5k
  fz_catch(ctx)
1070
0
  {
1071
0
    if (has_lock)
1072
0
      fz_ft_unlock(ctx);
1073
0
    if (fontdesc && etable != fontdesc->cid_to_gid)
1074
0
      fz_free(ctx, etable);
1075
0
    pdf_drop_font(ctx, fontdesc);
1076
0
    fz_rethrow(ctx);
1077
0
  }
1078
9.78k
  return fontdesc;
1079
9.78k
}
1080
1081
static int
1082
hail_mary_make_hash_key(fz_context *ctx, fz_store_hash *hash, void *key_)
1083
36.6k
{
1084
36.6k
  hash->u.pi.i = 0;
1085
36.6k
  hash->u.pi.ptr = NULL;
1086
36.6k
  return 1;
1087
36.6k
}
1088
1089
static void *
1090
hail_mary_keep_key(fz_context *ctx, void *key)
1091
2.31k
{
1092
2.31k
  return key;
1093
2.31k
}
1094
1095
static void
1096
hail_mary_drop_key(fz_context *ctx, void *key)
1097
2.31k
{
1098
2.31k
}
1099
1100
static int
1101
hail_mary_cmp_key(fz_context *ctx, void *k0, void *k1)
1102
0
{
1103
0
  return k0 == k1;
1104
0
}
1105
1106
static void
1107
hail_mary_format_key(fz_context *ctx, char *s, size_t n, void *key_)
1108
0
{
1109
0
  fz_strlcpy(s, "(hail mary font)", n);
1110
0
}
1111
1112
static int hail_mary_store_key; /* Dummy */
1113
1114
static const fz_store_type hail_mary_store_type =
1115
{
1116
  "hail-mary",
1117
  hail_mary_make_hash_key,
1118
  hail_mary_keep_key,
1119
  hail_mary_drop_key,
1120
  hail_mary_cmp_key,
1121
  hail_mary_format_key,
1122
  NULL
1123
};
1124
1125
pdf_font_desc *
1126
pdf_load_hail_mary_font(fz_context *ctx, pdf_document *doc)
1127
31.9k
{
1128
31.9k
  pdf_font_desc *fontdesc;
1129
31.9k
  pdf_font_desc *existing;
1130
1131
31.9k
  if ((fontdesc = fz_find_item(ctx, pdf_drop_font_imp, &hail_mary_store_key, &hail_mary_store_type)) != NULL)
1132
29.6k
  {
1133
29.6k
    return fontdesc;
1134
29.6k
  }
1135
1136
  /* FIXME: Get someone with a clue about fonts to fix this */
1137
2.31k
  fontdesc = pdf_load_simple_font(ctx, doc, NULL);
1138
1139
2.31k
  existing = fz_store_item(ctx, &hail_mary_store_key, fontdesc, fontdesc->size, &hail_mary_store_type);
1140
2.31k
  assert(existing == NULL);
1141
2.31k
  (void)existing; /* Silence warning in release builds */
1142
1143
2.31k
  return fontdesc;
1144
2.31k
}
1145
1146
/*
1147
 * CID Fonts
1148
 */
1149
1150
static pdf_font_desc *
1151
load_cid_font(fz_context *ctx, pdf_document *doc, pdf_obj *dict, pdf_obj *encoding, pdf_obj *to_unicode)
1152
2.46k
{
1153
2.46k
  pdf_obj *widths;
1154
2.46k
  pdf_obj *descriptor;
1155
2.46k
  pdf_font_desc *fontdesc = NULL;
1156
2.46k
  fz_buffer *buf = NULL;
1157
2.46k
  pdf_cmap *cmap;
1158
2.46k
  FT_Face face;
1159
2.46k
  char collection[256];
1160
2.46k
  const char *basefont;
1161
2.46k
  int i, k, fterr;
1162
2.46k
  pdf_obj *cidtogidmap;
1163
2.46k
  pdf_obj *obj;
1164
2.46k
  int dw;
1165
1166
2.46k
  fz_var(fontdesc);
1167
2.46k
  fz_var(buf);
1168
1169
4.93k
  fz_try(ctx)
1170
4.93k
  {
1171
    /* Get font name and CID collection */
1172
1173
2.46k
    basefont = pdf_dict_get_name(ctx, dict, PDF_NAME(BaseFont));
1174
1175
2.46k
    {
1176
2.46k
      pdf_obj *cidinfo;
1177
2.46k
      const char *reg, *ord;
1178
1179
2.46k
      cidinfo = pdf_dict_get(ctx, dict, PDF_NAME(CIDSystemInfo));
1180
2.46k
      if (cidinfo)
1181
2.29k
      {
1182
2.29k
        reg = pdf_dict_get_string(ctx, cidinfo, PDF_NAME(Registry), NULL);
1183
2.29k
        ord = pdf_dict_get_string(ctx, cidinfo, PDF_NAME(Ordering), NULL);
1184
2.29k
        fz_snprintf(collection, sizeof collection, "%s-%s", reg, ord);
1185
2.29k
      }
1186
173
      else
1187
173
      {
1188
173
        fz_warn(ctx, "CIDFont is missing CIDSystemInfo dictionary; assuming Adobe-Identity");
1189
173
        fz_strlcpy(collection, "Adobe-Identity", sizeof collection);
1190
173
      }
1191
2.46k
    }
1192
1193
    /* Encoding */
1194
1195
2.46k
    if (pdf_is_name(ctx, encoding))
1196
2.13k
    {
1197
2.13k
      cmap = pdf_load_system_cmap(ctx, pdf_to_name(ctx, encoding));
1198
2.13k
    }
1199
331
    else if (pdf_is_indirect(ctx, encoding))
1200
111
    {
1201
111
      cmap = pdf_load_embedded_cmap(ctx, doc, encoding);
1202
111
    }
1203
220
    else
1204
220
    {
1205
220
      fz_throw(ctx, FZ_ERROR_SYNTAX, "font missing encoding");
1206
220
    }
1207
1208
    /* Load font file */
1209
1210
2.24k
    fontdesc = pdf_new_font_desc(ctx);
1211
1212
2.24k
    fontdesc->encoding = cmap;
1213
2.24k
    fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding);
1214
1215
2.24k
    pdf_set_font_wmode(ctx, fontdesc, pdf_cmap_wmode(ctx, fontdesc->encoding));
1216
1217
2.24k
    descriptor = pdf_dict_get(ctx, dict, PDF_NAME(FontDescriptor));
1218
2.24k
    if (!descriptor)
1219
42
      fz_throw(ctx, FZ_ERROR_SYNTAX, "missing font descriptor");
1220
2.20k
    pdf_load_font_descriptor(ctx, doc, fontdesc, descriptor, collection, basefont, 1);
1221
1222
2.20k
    face = fontdesc->font->ft_face;
1223
1224
    /* Apply encoding */
1225
1226
2.20k
    cidtogidmap = pdf_dict_get(ctx, dict, PDF_NAME(CIDToGIDMap));
1227
2.20k
    if (pdf_is_stream(ctx, cidtogidmap))
1228
143
    {
1229
143
      size_t z, len;
1230
143
      unsigned char *data;
1231
1232
143
      buf = pdf_load_stream(ctx, cidtogidmap);
1233
1234
143
      len = fz_buffer_storage(ctx, buf, &data);
1235
143
      fontdesc->cid_to_gid_len = len / 2;
1236
143
      fontdesc->cid_to_gid = Memento_label(fz_malloc_array(ctx, fontdesc->cid_to_gid_len, unsigned short), "cid_to_gid_map");
1237
143
      fontdesc->size += fontdesc->cid_to_gid_len * sizeof(unsigned short);
1238
1.96M
      for (z = 0; z < fontdesc->cid_to_gid_len; z++)
1239
1.96M
        fontdesc->cid_to_gid[z] = (data[z * 2] << 8) + data[z * 2 + 1];
1240
143
    }
1241
2.06k
    else if (cidtogidmap && !pdf_name_eq(ctx, PDF_NAME(Identity), cidtogidmap))
1242
19
    {
1243
19
      fz_warn(ctx, "ignoring unknown CIDToGIDMap entry");
1244
19
    }
1245
1246
    /* if font is external, cidtogidmap should not be identity */
1247
    /* so we map from cid to unicode and then map that through the (3 1) */
1248
    /* unicode cmap to get a glyph id */
1249
2.04k
    else if (fontdesc->font->flags.ft_substitute)
1250
1.14k
    {
1251
1.14k
      fz_ft_lock(ctx);
1252
1.14k
      fterr = FT_Select_Charmap(face, ft_encoding_unicode);
1253
1.14k
      fz_ft_unlock(ctx);
1254
1.14k
      if (fterr)
1255
0
        fz_throw(ctx, FZ_ERROR_SYNTAX, "no unicode cmap when emulating CID font: %s", ft_error_string(fterr));
1256
1257
1.14k
      if (!strcmp(collection, "Adobe-CNS1"))
1258
42
        fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-CNS1-UCS2");
1259
1.10k
      else if (!strcmp(collection, "Adobe-GB1"))
1260
26
        fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2");
1261
1.07k
      else if (!strcmp(collection, "Adobe-Japan1"))
1262
129
        fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan1-UCS2");
1263
950
      else if (!strcmp(collection, "Adobe-Japan2"))
1264
1
        fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan2-UCS2");
1265
949
      else if (!strcmp(collection, "Adobe-Korea1"))
1266
47
        fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Korea1-UCS2");
1267
1.14k
    }
1268
1269
2.20k
    pdf_load_to_unicode(ctx, doc, fontdesc, NULL, collection, to_unicode);
1270
1271
    /* If we have an identity encoding, we're supposed to use the glyph ids directly.
1272
     * If we only have a substitute font, that won't work.
1273
     * Make a last ditch attempt by using
1274
     * the ToUnicode table if it exists to map via the substitute font's cmap. */
1275
2.20k
    if (strstr(fontdesc->encoding->cmap_name, "Identity-") && fontdesc->font->flags.ft_substitute)
1276
858
    {
1277
858
      if (!fontdesc->to_ttf_cmap)
1278
795
      {
1279
795
        if (fontdesc->to_unicode)
1280
486
        {
1281
          // Use ToUnicode from PDF file if possible.
1282
486
          fontdesc->to_ttf_cmap = pdf_keep_cmap(ctx, fontdesc->to_unicode);
1283
486
        }
1284
309
        else
1285
309
        {
1286
          // Attempt a generic ToUnicode (default MacRoman ordering for TrueType)
1287
309
          fontdesc->to_ttf_cmap = pdf_load_builtin_cmap(ctx, "TrueType-UCS2");
1288
309
        }
1289
795
      }
1290
1291
858
      if (fontdesc->to_ttf_cmap)
1292
858
      {
1293
858
        fz_warn(ctx, "non-embedded font using identity encoding: %s (mapping via %s)", basefont, fontdesc->to_ttf_cmap->cmap_name);
1294
858
        if (!fontdesc->to_unicode)
1295
309
          fontdesc->to_unicode = pdf_keep_cmap(ctx, fontdesc->to_ttf_cmap);
1296
858
      }
1297
0
      else
1298
0
        fz_warn(ctx, "non-embedded font using identity encoding: %s", basefont);
1299
858
    }
1300
1301
    /* Horizontal */
1302
1303
2.20k
    dw = pdf_dict_get_int_default(ctx, dict, PDF_NAME(DW), 1000);
1304
2.20k
    pdf_set_default_hmtx(ctx, fontdesc, dw);
1305
1306
2.20k
    widths = pdf_dict_get(ctx, dict, PDF_NAME(W));
1307
2.20k
    if (widths)
1308
1.73k
    {
1309
1.73k
      int c0, c1, w, n, m;
1310
1311
1.73k
      n = pdf_array_len(ctx, widths);
1312
59.7k
      for (i = 0; i < n; )
1313
57.9k
      {
1314
57.9k
        c0 = pdf_array_get_int(ctx, widths, i);
1315
57.9k
        obj = pdf_array_get(ctx, widths, i + 1);
1316
57.9k
        if (pdf_is_array(ctx, obj))
1317
30.8k
        {
1318
30.8k
          m = pdf_array_len(ctx, obj);
1319
722k
          for (k = 0; k < m; k++)
1320
691k
          {
1321
691k
            w = pdf_array_get_int(ctx, obj, k);
1322
691k
            pdf_add_hmtx(ctx, fontdesc, c0 + k, c0 + k, w);
1323
691k
          }
1324
30.8k
          i += 2;
1325
30.8k
        }
1326
27.1k
        else
1327
27.1k
        {
1328
27.1k
          c1 = pdf_to_int(ctx, obj);
1329
27.1k
          w = pdf_array_get_int(ctx, widths, i + 2);
1330
27.1k
          pdf_add_hmtx(ctx, fontdesc, c0, c1, w);
1331
27.1k
          i += 3;
1332
27.1k
        }
1333
57.9k
      }
1334
1.73k
    }
1335
1336
2.20k
    pdf_end_hmtx(ctx, fontdesc);
1337
1338
    /* Vertical */
1339
1340
2.20k
    if (pdf_cmap_wmode(ctx, fontdesc->encoding) == 1)
1341
7
    {
1342
7
      int dw2y = 880;
1343
7
      int dw2w = -1000;
1344
1345
7
      obj = pdf_dict_get(ctx, dict, PDF_NAME(DW2));
1346
7
      if (obj)
1347
3
      {
1348
3
        dw2y = pdf_array_get_int(ctx, obj, 0);
1349
3
        dw2w = pdf_array_get_int(ctx, obj, 1);
1350
3
      }
1351
1352
7
      pdf_set_default_vmtx(ctx, fontdesc, dw2y, dw2w);
1353
1354
7
      widths = pdf_dict_get(ctx, dict, PDF_NAME(W2));
1355
7
      if (widths)
1356
0
      {
1357
0
        int c0, c1, w, x, y, n;
1358
1359
0
        n = pdf_array_len(ctx, widths);
1360
0
        for (i = 0; i < n; )
1361
0
        {
1362
0
          c0 = pdf_array_get_int(ctx, widths, i);
1363
0
          obj = pdf_array_get(ctx, widths, i + 1);
1364
0
          if (pdf_is_array(ctx, obj))
1365
0
          {
1366
0
            int m = pdf_array_len(ctx, obj);
1367
0
            for (k = 0; k * 3 < m; k ++)
1368
0
            {
1369
0
              w = pdf_array_get_int(ctx, obj, k * 3 + 0);
1370
0
              x = pdf_array_get_int(ctx, obj, k * 3 + 1);
1371
0
              y = pdf_array_get_int(ctx, obj, k * 3 + 2);
1372
0
              pdf_add_vmtx(ctx, fontdesc, c0 + k, c0 + k, x, y, w);
1373
0
            }
1374
0
            i += 2;
1375
0
          }
1376
0
          else
1377
0
          {
1378
0
            c1 = pdf_to_int(ctx, obj);
1379
0
            w = pdf_array_get_int(ctx, widths, i + 2);
1380
0
            x = pdf_array_get_int(ctx, widths, i + 3);
1381
0
            y = pdf_array_get_int(ctx, widths, i + 4);
1382
0
            pdf_add_vmtx(ctx, fontdesc, c0, c1, x, y, w);
1383
0
            i += 5;
1384
0
          }
1385
0
        }
1386
0
      }
1387
1388
7
      pdf_end_vmtx(ctx, fontdesc);
1389
7
    }
1390
2.20k
  }
1391
4.93k
  fz_always(ctx)
1392
2.46k
    fz_drop_buffer(ctx, buf);
1393
2.46k
  fz_catch(ctx)
1394
474
  {
1395
474
    pdf_drop_font(ctx, fontdesc);
1396
474
    fz_rethrow(ctx);
1397
474
  }
1398
1399
1.73k
  return fontdesc;
1400
2.20k
}
1401
1402
static pdf_font_desc *
1403
pdf_load_type0_font(fz_context *ctx, pdf_document *doc, pdf_obj *dict)
1404
3.76k
{
1405
3.76k
  pdf_obj *dfonts;
1406
3.76k
  pdf_obj *dfont;
1407
3.76k
  pdf_obj *subtype;
1408
3.76k
  pdf_obj *encoding;
1409
3.76k
  pdf_obj *to_unicode;
1410
1411
3.76k
  dfonts = pdf_dict_get(ctx, dict, PDF_NAME(DescendantFonts));
1412
3.76k
  if (!dfonts)
1413
76
    fz_throw(ctx, FZ_ERROR_SYNTAX, "cid font is missing descendant fonts");
1414
1415
3.68k
  dfont = pdf_array_get(ctx, dfonts, 0);
1416
1417
3.68k
  subtype = pdf_dict_get(ctx, dfont, PDF_NAME(Subtype));
1418
3.68k
  encoding = pdf_dict_get(ctx, dict, PDF_NAME(Encoding));
1419
3.68k
  to_unicode = pdf_dict_get(ctx, dict, PDF_NAME(ToUnicode));
1420
1421
3.68k
  if (pdf_is_name(ctx, subtype) && pdf_name_eq(ctx, subtype, PDF_NAME(CIDFontType0)))
1422
729
    return load_cid_font(ctx, doc, dfont, encoding, to_unicode);
1423
2.96k
  if (pdf_is_name(ctx, subtype) && pdf_name_eq(ctx, subtype, PDF_NAME(CIDFontType2)))
1424
1.73k
    return load_cid_font(ctx, doc, dfont, encoding, to_unicode);
1425
1.22k
  fz_throw(ctx, FZ_ERROR_SYNTAX, "unknown cid font type");
1426
2.96k
}
1427
1428
/*
1429
 * FontDescriptor
1430
 */
1431
1432
static void
1433
pdf_load_font_descriptor(fz_context *ctx, pdf_document *doc, pdf_font_desc *fontdesc, pdf_obj *dict,
1434
  const char *collection, const char *basefont, int iscidfont)
1435
8.45k
{
1436
8.45k
  pdf_obj *obj1, *obj2, *obj3, *obj;
1437
8.45k
  const char *fontname;
1438
8.45k
  FT_Face face;
1439
1440
  /* Prefer BaseFont; don't bother with FontName */
1441
8.45k
  fontname = basefont;
1442
1443
8.45k
  fontdesc->flags = pdf_dict_get_int(ctx, dict, PDF_NAME(Flags));
1444
8.45k
  fontdesc->italic_angle = pdf_dict_get_real(ctx, dict, PDF_NAME(ItalicAngle));
1445
8.45k
  fontdesc->ascent = pdf_dict_get_real(ctx, dict, PDF_NAME(Ascent));
1446
8.45k
  fontdesc->descent = pdf_dict_get_real(ctx, dict, PDF_NAME(Descent));
1447
8.45k
  fontdesc->cap_height = pdf_dict_get_real(ctx, dict, PDF_NAME(CapHeight));
1448
8.45k
  fontdesc->x_height = pdf_dict_get_real(ctx, dict, PDF_NAME(XHeight));
1449
8.45k
  fontdesc->missing_width = pdf_dict_get_real(ctx, dict, PDF_NAME(MissingWidth));
1450
1451
8.45k
  obj1 = pdf_dict_get(ctx, dict, PDF_NAME(FontFile));
1452
8.45k
  obj2 = pdf_dict_get(ctx, dict, PDF_NAME(FontFile2));
1453
8.45k
  obj3 = pdf_dict_get(ctx, dict, PDF_NAME(FontFile3));
1454
8.45k
  obj = obj1 ? obj1 : obj2 ? obj2 : obj3;
1455
1456
8.45k
  if (pdf_is_indirect(ctx, obj))
1457
7.26k
  {
1458
14.5k
    fz_try(ctx)
1459
14.5k
    {
1460
7.26k
      pdf_load_embedded_font(ctx, doc, fontdesc, fontname, obj);
1461
7.26k
    }
1462
14.5k
    fz_catch(ctx)
1463
2.74k
    {
1464
2.74k
      fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
1465
2.74k
      fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
1466
2.74k
      fz_report_error(ctx);
1467
2.74k
      fz_warn(ctx, "ignored error when loading embedded font; attempting to load system font");
1468
2.74k
      if (!iscidfont && fontname != pdf_clean_font_name(fontname))
1469
199
        pdf_load_builtin_font(ctx, fontdesc, fontname, 1);
1470
2.54k
      else
1471
2.54k
        pdf_load_system_font(ctx, fontdesc, fontname, collection);
1472
2.74k
    }
1473
7.26k
  }
1474
1.19k
  else
1475
1.19k
  {
1476
1.19k
    if (!iscidfont && fontname != pdf_clean_font_name(fontname))
1477
199
      pdf_load_builtin_font(ctx, fontdesc, fontname, 1);
1478
994
    else
1479
994
      pdf_load_system_font(ctx, fontdesc, fontname, collection);
1480
1.19k
  }
1481
1482
  /* Check for DynaLab fonts that must use hinting */
1483
8.45k
  face = fontdesc->font->ft_face;
1484
8.45k
  if (ft_kind(ctx, face) == TRUETYPE)
1485
2.69k
  {
1486
    /* FreeType's own 'tricky' font detection needs a bit of help */
1487
2.69k
    if (is_dynalab(fontdesc->font->name))
1488
469
      face->face_flags |= FT_FACE_FLAG_TRICKY;
1489
1490
2.69k
    if (fontdesc->ascent == 0.0f)
1491
269
      fontdesc->ascent = 1000.0f * face->ascender / face->units_per_EM;
1492
1493
2.69k
    if (fontdesc->descent == 0.0f)
1494
587
      fontdesc->descent = 1000.0f * face->descender / face->units_per_EM;
1495
2.69k
  }
1496
8.45k
}
1497
1498
static void
1499
pdf_make_width_table(fz_context *ctx, pdf_font_desc *fontdesc)
1500
9.88k
{
1501
9.88k
  fz_font *font = fontdesc->font;
1502
9.88k
  int i, k, n, cid, gid;
1503
1504
9.88k
  n = 0;
1505
1.78M
  for (i = 0; i < fontdesc->hmtx_len; i++)
1506
1.77M
  {
1507
19.6M
    for (k = fontdesc->hmtx[i].lo; k <= fontdesc->hmtx[i].hi; k++)
1508
17.8M
    {
1509
17.8M
      cid = pdf_lookup_cmap(fontdesc->encoding, k);
1510
17.8M
      gid = pdf_font_cid_to_gid(ctx, fontdesc, cid);
1511
17.8M
      if (gid > n)
1512
1.03M
        n = gid;
1513
17.8M
    }
1514
1.77M
  }
1515
1516
9.88k
  font->width_count = n + 1;
1517
9.88k
  font->width_table = Memento_label(fz_malloc_array(ctx, font->width_count, short), "font_widths");
1518
9.88k
  fontdesc->size += font->width_count * sizeof(short);
1519
1520
9.88k
  font->width_default = fontdesc->dhmtx.w;
1521
4.08M
  for (i = 0; i < font->width_count; i++)
1522
4.07M
    font->width_table[i] = -1;
1523
1524
1.78M
  for (i = 0; i < fontdesc->hmtx_len; i++)
1525
1.77M
  {
1526
19.6M
    for (k = fontdesc->hmtx[i].lo; k <= fontdesc->hmtx[i].hi; k++)
1527
17.8M
    {
1528
17.8M
      cid = pdf_lookup_cmap(fontdesc->encoding, k);
1529
17.8M
      gid = pdf_font_cid_to_gid(ctx, fontdesc, cid);
1530
17.8M
      if (gid >= 0 && gid < font->width_count)
1531
16.8M
        font->width_table[gid] = fz_maxi(fontdesc->hmtx[i].w, font->width_table[gid]);
1532
17.8M
    }
1533
1.77M
  }
1534
1535
4.08M
  for (i = 0; i < font->width_count; i++)
1536
4.07M
    if (font->width_table[i] == -1)
1537
2.57M
      font->width_table[i] = font->width_default;
1538
9.88k
}
1539
1540
pdf_font_desc *
1541
pdf_load_font(fz_context *ctx, pdf_document *doc, pdf_obj *rdb, pdf_obj *dict)
1542
91.6k
{
1543
91.6k
  pdf_obj *subtype;
1544
91.6k
  pdf_obj *dfonts;
1545
91.6k
  pdf_obj *charprocs;
1546
91.6k
  pdf_font_desc *fontdesc = NULL;
1547
91.6k
  int type3 = 0;
1548
1549
91.6k
  if ((fontdesc = pdf_find_item(ctx, pdf_drop_font_imp, dict)) != NULL)
1550
79.9k
  {
1551
79.9k
    if (fontdesc->t3loading)
1552
39
    {
1553
39
      pdf_drop_font(ctx, fontdesc);
1554
39
      fz_throw(ctx, FZ_ERROR_SYNTAX, "recursive type3 font");
1555
39
    }
1556
79.9k
    return fontdesc;
1557
79.9k
  }
1558
1559
11.6k
  subtype = pdf_dict_get(ctx, dict, PDF_NAME(Subtype));
1560
11.6k
  dfonts = pdf_dict_get(ctx, dict, PDF_NAME(DescendantFonts));
1561
11.6k
  charprocs = pdf_dict_get(ctx, dict, PDF_NAME(CharProcs));
1562
1563
11.6k
  if (pdf_name_eq(ctx, subtype, PDF_NAME(Type0)))
1564
3.51k
    fontdesc = pdf_load_type0_font(ctx, doc, dict);
1565
8.16k
  else if (pdf_name_eq(ctx, subtype, PDF_NAME(Type1)))
1566
4.01k
    fontdesc = pdf_load_simple_font(ctx, doc, dict);
1567
4.15k
  else if (pdf_name_eq(ctx, subtype, PDF_NAME(MMType1)))
1568
179
    fontdesc = pdf_load_simple_font(ctx, doc, dict);
1569
3.97k
  else if (pdf_name_eq(ctx, subtype, PDF_NAME(TrueType)))
1570
2.74k
    fontdesc = pdf_load_simple_font(ctx, doc, dict);
1571
1.23k
  else if (pdf_name_eq(ctx, subtype, PDF_NAME(Type3)))
1572
434
  {
1573
434
    fontdesc = pdf_load_type3_font(ctx, doc, rdb, dict);
1574
434
    type3 = 1;
1575
434
  }
1576
797
  else if (charprocs)
1577
7
  {
1578
7
    fz_warn(ctx, "unknown font format, guessing type3.");
1579
7
    fontdesc = pdf_load_type3_font(ctx, doc, rdb, dict);
1580
7
    type3 = 1;
1581
7
  }
1582
790
  else if (dfonts)
1583
253
  {
1584
253
    fz_warn(ctx, "unknown font format, guessing type0.");
1585
253
    fontdesc = pdf_load_type0_font(ctx, doc, dict);
1586
253
  }
1587
537
  else
1588
537
  {
1589
537
    fz_warn(ctx, "unknown font format, guessing type1 or truetype.");
1590
537
    fontdesc = pdf_load_simple_font(ctx, doc, dict);
1591
537
  }
1592
1593
19.7k
  fz_try(ctx)
1594
19.7k
  {
1595
    /* Create glyph width table for stretching substitute fonts and text extraction. */
1596
9.88k
    pdf_make_width_table(ctx, fontdesc);
1597
1598
9.88k
    pdf_store_item(ctx, dict, fontdesc, fontdesc->size);
1599
1600
    /* Load CharProcs */
1601
9.88k
    if (type3)
1602
425
    {
1603
425
      fontdesc->t3loading = 1;
1604
850
      fz_try(ctx)
1605
850
        pdf_load_type3_glyphs(ctx, doc, fontdesc);
1606
850
      fz_always(ctx)
1607
425
        fontdesc->t3loading = 0;
1608
425
      fz_catch(ctx)
1609
1
      {
1610
1
        pdf_remove_item(ctx, fontdesc->storable.drop, dict);
1611
1
        fz_rethrow(ctx);
1612
1
      }
1613
425
    }
1614
9.88k
  }
1615
19.7k
  fz_catch(ctx)
1616
1
  {
1617
1
    pdf_drop_font(ctx, fontdesc);
1618
1
    fz_rethrow(ctx);
1619
1
  }
1620
1621
11.6k
  return fontdesc;
1622
11.6k
}
1623
1624
void
1625
pdf_print_font(fz_context *ctx, fz_output *out, pdf_font_desc *fontdesc)
1626
0
{
1627
0
  int i;
1628
1629
0
  fz_write_printf(ctx, out, "fontdesc {\n");
1630
1631
0
  if (fontdesc->font->ft_face)
1632
0
    fz_write_printf(ctx, out, "\tfreetype font\n");
1633
0
  if (fontdesc->font->t3procs)
1634
0
    fz_write_printf(ctx, out, "\ttype3 font\n");
1635
1636
0
  fz_write_printf(ctx, out, "\twmode %d\n", fontdesc->wmode);
1637
0
  fz_write_printf(ctx, out, "\tDW %d\n", fontdesc->dhmtx.w);
1638
1639
0
  fz_write_printf(ctx, out, "\tW {\n");
1640
0
  for (i = 0; i < fontdesc->hmtx_len; i++)
1641
0
    fz_write_printf(ctx, out, "\t\t<%04x> <%04x> %d\n",
1642
0
      fontdesc->hmtx[i].lo, fontdesc->hmtx[i].hi, fontdesc->hmtx[i].w);
1643
0
  fz_write_printf(ctx, out, "\t}\n");
1644
1645
0
  if (fontdesc->wmode)
1646
0
  {
1647
0
    fz_write_printf(ctx, out, "\tDW2 [%d %d]\n", fontdesc->dvmtx.y, fontdesc->dvmtx.w);
1648
0
    fz_write_printf(ctx, out, "\tW2 {\n");
1649
0
    for (i = 0; i < fontdesc->vmtx_len; i++)
1650
0
      fz_write_printf(ctx, out, "\t\t<%04x> <%04x> %d %d %d\n", fontdesc->vmtx[i].lo, fontdesc->vmtx[i].hi,
1651
0
        fontdesc->vmtx[i].x, fontdesc->vmtx[i].y, fontdesc->vmtx[i].w);
1652
0
    fz_write_printf(ctx, out, "\t}\n");
1653
0
  }
1654
0
}