Coverage Report

Created: 2026-03-31 07:17

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/mupdf/source/pdf/pdf-cmap-load.c
Line
Count
Source
1
// Copyright (C) 2004-2024 Artifex Software, Inc.
2
//
3
// This file is part of MuPDF.
4
//
5
// MuPDF is free software: you can redistribute it and/or modify it under the
6
// terms of the GNU Affero General Public License as published by the Free
7
// Software Foundation, either version 3 of the License, or (at your option)
8
// any later version.
9
//
10
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13
// details.
14
//
15
// You should have received a copy of the GNU Affero General Public License
16
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17
//
18
// Alternative licensing terms are available from the licensor.
19
// For commercial licensing, see <https://www.artifex.com/> or contact
20
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21
// CA 94129, USA, for further information.
22
23
#include "mupdf/fitz.h"
24
#include "mupdf/pdf.h"
25
26
#include "cmaps/TrueType-UCS2.h"
27
28
#include <string.h>
29
30
static pdf_cmap *
31
pdf_load_embedded_cmap_imp(fz_context *ctx, pdf_document *doc, pdf_obj *stmobj, pdf_cycle_list *cycle_up)
32
1
{
33
1
  pdf_cycle_list cycle;
34
1
  fz_stream *file = NULL;
35
1
  pdf_cmap *cmap = NULL;
36
1
  pdf_cmap *usecmap = NULL;
37
1
  pdf_obj *obj;
38
39
1
  fz_var(file);
40
1
  fz_var(cmap);
41
1
  fz_var(usecmap);
42
43
1
  if ((cmap = pdf_find_item(ctx, pdf_drop_cmap_imp, stmobj)) != NULL)
44
0
    return cmap;
45
46
2
  fz_try(ctx)
47
2
  {
48
1
    file = pdf_open_stream(ctx, stmobj);
49
1
    cmap = pdf_load_cmap(ctx, file);
50
51
1
    obj = pdf_dict_get(ctx, stmobj, PDF_NAME(WMode));
52
1
    if (pdf_is_int(ctx, obj))
53
0
      pdf_set_cmap_wmode(ctx, cmap, pdf_to_int(ctx, obj));
54
55
1
    obj = pdf_dict_get(ctx, stmobj, PDF_NAME(UseCMap));
56
1
    if (pdf_is_name(ctx, obj))
57
0
    {
58
0
      fz_try(ctx)
59
0
      {
60
0
        usecmap = pdf_load_system_cmap(ctx, pdf_to_name(ctx, obj));
61
0
        pdf_set_usecmap(ctx, cmap, usecmap);
62
0
      }
63
0
      fz_catch(ctx)
64
0
      {
65
0
        fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
66
0
        fz_report_error(ctx);
67
0
        fz_warn(ctx, "cannot load system CMap: %s", pdf_to_name(ctx, obj));
68
0
      }
69
0
    }
70
1
    else if (pdf_is_indirect(ctx, obj))
71
0
    {
72
0
      if (pdf_cycle(ctx, &cycle, cycle_up, obj))
73
0
        fz_throw(ctx, FZ_ERROR_FORMAT, "recursive CMap");
74
0
      usecmap = pdf_load_embedded_cmap_imp(ctx, doc, obj, &cycle);
75
0
      pdf_set_usecmap(ctx, cmap, usecmap);
76
0
    }
77
1
    else if (strlen(cmap->usecmap_name) > 0)
78
0
    {
79
0
      fz_try(ctx)
80
0
      {
81
0
        usecmap = pdf_load_system_cmap(ctx, cmap->usecmap_name);
82
0
        pdf_set_usecmap(ctx, cmap, usecmap);
83
0
      }
84
0
      fz_catch(ctx)
85
0
      {
86
0
        fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
87
0
        fz_report_error(ctx);
88
0
        fz_warn(ctx, "cannot load system CMap: %s", pdf_to_name(ctx, obj));
89
0
      }
90
0
    }
91
92
1
    pdf_store_item(ctx, stmobj, cmap, pdf_cmap_size(ctx, cmap));
93
1
  }
94
2
  fz_always(ctx)
95
1
  {
96
1
    fz_drop_stream(ctx, file);
97
1
    pdf_drop_cmap(ctx, usecmap);
98
1
  }
99
1
  fz_catch(ctx)
100
0
  {
101
0
    pdf_drop_cmap(ctx, cmap);
102
0
    fz_rethrow(ctx);
103
0
  }
104
105
1
  return cmap;
106
1
}
107
108
pdf_cmap *
109
pdf_load_embedded_cmap(fz_context *ctx, pdf_document *doc, pdf_obj *stmobj)
110
1
{
111
1
  return pdf_load_embedded_cmap_imp(ctx, doc, stmobj, NULL);
112
1
}
113
114
pdf_cmap *
115
pdf_new_identity_cmap(fz_context *ctx, int wmode, int bytes)
116
0
{
117
0
  pdf_cmap *cmap = pdf_new_cmap(ctx);
118
0
  fz_try(ctx)
119
0
  {
120
0
    unsigned int high = (1 << (bytes * 8)) - 1;
121
0
    if (wmode)
122
0
      fz_strlcpy(cmap->cmap_name, "Identity-V", sizeof cmap->cmap_name);
123
0
    else
124
0
      fz_strlcpy(cmap->cmap_name, "Identity-H", sizeof cmap->cmap_name);
125
0
    pdf_add_codespace(ctx, cmap, 0, high, bytes);
126
0
    pdf_map_range_to_range(ctx, cmap, 0, high, 0);
127
0
    pdf_sort_cmap(ctx, cmap);
128
0
    pdf_set_cmap_wmode(ctx, cmap, wmode);
129
0
  }
130
0
  fz_catch(ctx)
131
0
  {
132
0
    pdf_drop_cmap(ctx, cmap);
133
0
    fz_rethrow(ctx);
134
0
  }
135
0
  return cmap;
136
0
}
137
138
#ifdef NO_CJK
139
140
pdf_cmap *
141
pdf_load_builtin_cmap(fz_context *ctx, const char *name)
142
{
143
  if (!strcmp(name, "Identity-H")) return pdf_new_identity_cmap(ctx, 0, 2);
144
  if (!strcmp(name, "Identity-V")) return pdf_new_identity_cmap(ctx, 1, 2);
145
  if (!strcmp(name, "TrueType-UCS2")) return &cmap_TrueType_UCS2;
146
  return NULL;
147
}
148
149
#else
150
151
/* To regenerate this list: :r !bash scripts/runcmapdump.sh */
152
153
#include "cmaps/83pv-RKSJ-H.h"
154
#include "cmaps/90ms-RKSJ-H.h"
155
#include "cmaps/90ms-RKSJ-V.h"
156
#include "cmaps/90msp-RKSJ-H.h"
157
#include "cmaps/90msp-RKSJ-V.h"
158
#include "cmaps/90pv-RKSJ-H.h"
159
#include "cmaps/Add-RKSJ-H.h"
160
#include "cmaps/Add-RKSJ-V.h"
161
#include "cmaps/Adobe-CNS1-UCS2.h"
162
#include "cmaps/Adobe-GB1-UCS2.h"
163
#include "cmaps/Adobe-Japan1-UCS2.h"
164
#include "cmaps/Adobe-Korea1-UCS2.h"
165
#include "cmaps/B5pc-H.h"
166
#include "cmaps/B5pc-V.h"
167
#include "cmaps/CNS-EUC-H.h"
168
#include "cmaps/CNS-EUC-V.h"
169
#include "cmaps/ETen-B5-H.h"
170
#include "cmaps/ETen-B5-V.h"
171
#include "cmaps/ETenms-B5-H.h"
172
#include "cmaps/ETenms-B5-V.h"
173
#include "cmaps/EUC-H.h"
174
#include "cmaps/EUC-V.h"
175
#include "cmaps/Ext-RKSJ-H.h"
176
#include "cmaps/Ext-RKSJ-V.h"
177
#include "cmaps/GB-EUC-H.h"
178
#include "cmaps/GB-EUC-V.h"
179
#include "cmaps/GBK-EUC-H.h"
180
#include "cmaps/GBK-EUC-V.h"
181
#include "cmaps/GBK-X.h"
182
#include "cmaps/GBK2K-H.h"
183
#include "cmaps/GBK2K-V.h"
184
#include "cmaps/GBKp-EUC-H.h"
185
#include "cmaps/GBKp-EUC-V.h"
186
#include "cmaps/GBpc-EUC-H.h"
187
#include "cmaps/GBpc-EUC-V.h"
188
#include "cmaps/H.h"
189
#include "cmaps/HKscs-B5-H.h"
190
#include "cmaps/HKscs-B5-V.h"
191
#include "cmaps/Identity-H.h"
192
#include "cmaps/Identity-V.h"
193
#include "cmaps/KSC-EUC-H.h"
194
#include "cmaps/KSC-EUC-V.h"
195
#include "cmaps/KSCms-UHC-H.h"
196
#include "cmaps/KSCms-UHC-HW-H.h"
197
#include "cmaps/KSCms-UHC-HW-V.h"
198
#include "cmaps/KSCms-UHC-V.h"
199
#include "cmaps/KSCpc-EUC-H.h"
200
#include "cmaps/UniCNS-UCS2-H.h"
201
#include "cmaps/UniCNS-UCS2-V.h"
202
#include "cmaps/UniCNS-UTF16-H.h"
203
#include "cmaps/UniCNS-UTF16-V.h"
204
#include "cmaps/UniCNS-X.h"
205
#include "cmaps/UniGB-UCS2-H.h"
206
#include "cmaps/UniGB-UCS2-V.h"
207
#include "cmaps/UniGB-UTF16-H.h"
208
#include "cmaps/UniGB-UTF16-V.h"
209
#include "cmaps/UniGB-X.h"
210
#include "cmaps/UniJIS-UCS2-H.h"
211
#include "cmaps/UniJIS-UCS2-HW-H.h"
212
#include "cmaps/UniJIS-UCS2-HW-V.h"
213
#include "cmaps/UniJIS-UCS2-V.h"
214
#include "cmaps/UniJIS-UTF16-H.h"
215
#include "cmaps/UniJIS-UTF16-V.h"
216
#include "cmaps/UniJIS-X.h"
217
#include "cmaps/UniKS-UCS2-H.h"
218
#include "cmaps/UniKS-UCS2-V.h"
219
#include "cmaps/UniKS-UTF16-H.h"
220
#include "cmaps/UniKS-UTF16-V.h"
221
#include "cmaps/UniKS-X.h"
222
#include "cmaps/V.h"
223
224
static pdf_cmap *table[] = {
225
  &cmap_83pv_RKSJ_H,
226
  &cmap_90ms_RKSJ_H,
227
  &cmap_90ms_RKSJ_V,
228
  &cmap_90msp_RKSJ_H,
229
  &cmap_90msp_RKSJ_V,
230
  &cmap_90pv_RKSJ_H,
231
  &cmap_Add_RKSJ_H,
232
  &cmap_Add_RKSJ_V,
233
  &cmap_Adobe_CNS1_UCS2,
234
  &cmap_Adobe_GB1_UCS2,
235
  &cmap_Adobe_Japan1_UCS2,
236
  &cmap_Adobe_Korea1_UCS2,
237
  &cmap_B5pc_H,
238
  &cmap_B5pc_V,
239
  &cmap_CNS_EUC_H,
240
  &cmap_CNS_EUC_V,
241
  &cmap_ETen_B5_H,
242
  &cmap_ETen_B5_V,
243
  &cmap_ETenms_B5_H,
244
  &cmap_ETenms_B5_V,
245
  &cmap_EUC_H,
246
  &cmap_EUC_V,
247
  &cmap_Ext_RKSJ_H,
248
  &cmap_Ext_RKSJ_V,
249
  &cmap_GB_EUC_H,
250
  &cmap_GB_EUC_V,
251
  &cmap_GBK_EUC_H,
252
  &cmap_GBK_EUC_V,
253
  &cmap_GBK_X,
254
  &cmap_GBK2K_H,
255
  &cmap_GBK2K_V,
256
  &cmap_GBKp_EUC_H,
257
  &cmap_GBKp_EUC_V,
258
  &cmap_GBpc_EUC_H,
259
  &cmap_GBpc_EUC_V,
260
  &cmap_H,
261
  &cmap_HKscs_B5_H,
262
  &cmap_HKscs_B5_V,
263
  &cmap_Identity_H,
264
  &cmap_Identity_V,
265
  &cmap_KSC_EUC_H,
266
  &cmap_KSC_EUC_V,
267
  &cmap_KSCms_UHC_H,
268
  &cmap_KSCms_UHC_HW_H,
269
  &cmap_KSCms_UHC_HW_V,
270
  &cmap_KSCms_UHC_V,
271
  &cmap_KSCpc_EUC_H,
272
  &cmap_TrueType_UCS2,
273
  &cmap_UniCNS_UCS2_H,
274
  &cmap_UniCNS_UCS2_V,
275
  &cmap_UniCNS_UTF16_H,
276
  &cmap_UniCNS_UTF16_V,
277
  &cmap_UniCNS_X,
278
  &cmap_UniGB_UCS2_H,
279
  &cmap_UniGB_UCS2_V,
280
  &cmap_UniGB_UTF16_H,
281
  &cmap_UniGB_UTF16_V,
282
  &cmap_UniGB_X,
283
  &cmap_UniJIS_UCS2_H,
284
  &cmap_UniJIS_UCS2_HW_H,
285
  &cmap_UniJIS_UCS2_HW_V,
286
  &cmap_UniJIS_UCS2_V,
287
  &cmap_UniJIS_UTF16_H,
288
  &cmap_UniJIS_UTF16_V,
289
  &cmap_UniJIS_X,
290
  &cmap_UniKS_UCS2_H,
291
  &cmap_UniKS_UCS2_V,
292
  &cmap_UniKS_UTF16_H,
293
  &cmap_UniKS_UTF16_V,
294
  &cmap_UniKS_X,
295
  &cmap_V,
296
};
297
298
pdf_cmap *
299
pdf_load_builtin_cmap(fz_context *ctx, const char *name)
300
1
{
301
1
  int r = nelem(table)-1;
302
1
  int l = 0;
303
6
  while (l <= r)
304
6
  {
305
6
    int m = (l + r) >> 1;
306
6
    int c = strcmp(name, table[m]->cmap_name);
307
6
    if (c < 0)
308
3
      r = m - 1;
309
3
    else if (c > 0)
310
2
      l = m + 1;
311
1
    else
312
1
      return table[m];
313
6
  }
314
0
  return NULL;
315
1
}
316
317
#endif
318
319
pdf_cmap *
320
pdf_load_system_cmap(fz_context *ctx, const char *cmap_name)
321
1
{
322
1
  pdf_cmap *usecmap;
323
1
  pdf_cmap *cmap;
324
325
1
  cmap = pdf_load_builtin_cmap(ctx, cmap_name);
326
1
  if (!cmap)
327
0
    fz_throw(ctx, FZ_ERROR_FORMAT, "no builtin cmap file: %s", cmap_name);
328
329
1
  if (cmap->usecmap_name[0] && !cmap->usecmap)
330
0
  {
331
0
    usecmap = pdf_load_system_cmap(ctx, cmap->usecmap_name);
332
0
    if (!usecmap)
333
0
      fz_throw(ctx, FZ_ERROR_FORMAT, "no builtin cmap file: %s", cmap->usecmap_name);
334
0
    pdf_set_usecmap(ctx, cmap, usecmap);
335
0
  }
336
337
1
  return cmap;
338
1
}