Coverage Report

Created: 2023-06-07 06:20

/src/mupdf/source/pdf/pdf-cmap-load.c
Line
Count
Source (jump to first uncovered line)
1
// Copyright (C) 2004-2021 Artifex Software, Inc.
2
//
3
// This file is part of MuPDF.
4
//
5
// MuPDF is free software: you can redistribute it and/or modify it under the
6
// terms of the GNU Affero General Public License as published by the Free
7
// Software Foundation, either version 3 of the License, or (at your option)
8
// any later version.
9
//
10
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13
// details.
14
//
15
// You should have received a copy of the GNU Affero General Public License
16
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17
//
18
// Alternative licensing terms are available from the licensor.
19
// For commercial licensing, see <https://www.artifex.com/> or contact
20
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21
// CA 94129, USA, for further information.
22
23
#include "mupdf/fitz.h"
24
#include "mupdf/pdf.h"
25
26
#include <string.h>
27
28
static pdf_cmap *
29
pdf_load_embedded_cmap_imp(fz_context *ctx, pdf_document *doc, pdf_obj *stmobj, pdf_cycle_list *cycle_up)
30
2.89k
{
31
2.89k
  pdf_cycle_list cycle;
32
2.89k
  fz_stream *file = NULL;
33
2.89k
  pdf_cmap *cmap = NULL;
34
2.89k
  pdf_cmap *usecmap = NULL;
35
2.89k
  pdf_obj *obj;
36
37
2.89k
  fz_var(file);
38
2.89k
  fz_var(cmap);
39
2.89k
  fz_var(usecmap);
40
41
2.89k
  if ((cmap = pdf_find_item(ctx, pdf_drop_cmap_imp, stmobj)) != NULL)
42
37
    return cmap;
43
44
5.70k
  fz_try(ctx)
45
5.70k
  {
46
2.85k
    file = pdf_open_stream(ctx, stmobj);
47
2.85k
    cmap = pdf_load_cmap(ctx, file);
48
49
2.85k
    obj = pdf_dict_get(ctx, stmobj, PDF_NAME(WMode));
50
2.85k
    if (pdf_is_int(ctx, obj))
51
97
      pdf_set_cmap_wmode(ctx, cmap, pdf_to_int(ctx, obj));
52
53
2.85k
    obj = pdf_dict_get(ctx, stmobj, PDF_NAME(UseCMap));
54
2.85k
    if (pdf_is_name(ctx, obj))
55
0
    {
56
0
      usecmap = pdf_load_system_cmap(ctx, pdf_to_name(ctx, obj));
57
0
      pdf_set_usecmap(ctx, cmap, usecmap);
58
0
    }
59
2.85k
    else if (pdf_is_indirect(ctx, obj))
60
0
    {
61
0
      if (pdf_cycle(ctx, &cycle, cycle_up, obj))
62
0
        fz_throw(ctx, FZ_ERROR_GENERIC, "recursive CMap");
63
0
      usecmap = pdf_load_embedded_cmap_imp(ctx, doc, obj, &cycle);
64
0
      pdf_set_usecmap(ctx, cmap, usecmap);
65
0
    }
66
67
2.85k
    pdf_store_item(ctx, stmobj, cmap, pdf_cmap_size(ctx, cmap));
68
2.85k
  }
69
5.70k
  fz_always(ctx)
70
2.85k
  {
71
2.85k
    fz_drop_stream(ctx, file);
72
2.85k
    pdf_drop_cmap(ctx, usecmap);
73
2.85k
  }
74
2.85k
  fz_catch(ctx)
75
3
  {
76
3
    pdf_drop_cmap(ctx, cmap);
77
3
    fz_rethrow(ctx);
78
3
  }
79
80
2.85k
  return cmap;
81
2.85k
}
82
83
pdf_cmap *
84
pdf_load_embedded_cmap(fz_context *ctx, pdf_document *doc, pdf_obj *stmobj)
85
2.89k
{
86
2.89k
  return pdf_load_embedded_cmap_imp(ctx, doc, stmobj, NULL);
87
2.89k
}
88
89
pdf_cmap *
90
pdf_new_identity_cmap(fz_context *ctx, int wmode, int bytes)
91
11.5k
{
92
11.5k
  pdf_cmap *cmap = pdf_new_cmap(ctx);
93
23.1k
  fz_try(ctx)
94
23.1k
  {
95
11.5k
    unsigned int high = (1 << (bytes * 8)) - 1;
96
11.5k
    if (wmode)
97
0
      fz_strlcpy(cmap->cmap_name, "Identity-V", sizeof cmap->cmap_name);
98
11.5k
    else
99
11.5k
      fz_strlcpy(cmap->cmap_name, "Identity-H", sizeof cmap->cmap_name);
100
11.5k
    pdf_add_codespace(ctx, cmap, 0, high, bytes);
101
11.5k
    pdf_map_range_to_range(ctx, cmap, 0, high, 0);
102
11.5k
    pdf_sort_cmap(ctx, cmap);
103
11.5k
    pdf_set_cmap_wmode(ctx, cmap, wmode);
104
11.5k
  }
105
23.1k
  fz_catch(ctx)
106
0
  {
107
0
    pdf_drop_cmap(ctx, cmap);
108
0
    fz_rethrow(ctx);
109
0
  }
110
11.5k
  return cmap;
111
11.5k
}
112
113
#ifdef NO_CJK
114
115
pdf_cmap *
116
pdf_load_builtin_cmap(fz_context *ctx, const char *name)
117
{
118
  if (!strcmp(name, "Identity-H")) return pdf_new_identity_cmap(ctx, 0, 2);
119
  if (!strcmp(name, "Identity-V")) return pdf_new_identity_cmap(ctx, 1, 2);
120
  return NULL;
121
}
122
123
#else
124
125
/* To regenerate this list: :r !bash scripts/runcmapdump.sh */
126
127
#include "cmaps/83pv-RKSJ-H.h"
128
#include "cmaps/90ms-RKSJ-H.h"
129
#include "cmaps/90ms-RKSJ-V.h"
130
#include "cmaps/90msp-RKSJ-H.h"
131
#include "cmaps/90msp-RKSJ-V.h"
132
#include "cmaps/90pv-RKSJ-H.h"
133
#include "cmaps/Add-RKSJ-H.h"
134
#include "cmaps/Add-RKSJ-V.h"
135
#include "cmaps/Adobe-CNS1-UCS2.h"
136
#include "cmaps/Adobe-GB1-UCS2.h"
137
#include "cmaps/Adobe-Japan1-UCS2.h"
138
#include "cmaps/Adobe-Korea1-UCS2.h"
139
#include "cmaps/B5pc-H.h"
140
#include "cmaps/B5pc-V.h"
141
#include "cmaps/CNS-EUC-H.h"
142
#include "cmaps/CNS-EUC-V.h"
143
#include "cmaps/ETen-B5-H.h"
144
#include "cmaps/ETen-B5-V.h"
145
#include "cmaps/ETenms-B5-H.h"
146
#include "cmaps/ETenms-B5-V.h"
147
#include "cmaps/EUC-H.h"
148
#include "cmaps/EUC-V.h"
149
#include "cmaps/Ext-RKSJ-H.h"
150
#include "cmaps/Ext-RKSJ-V.h"
151
#include "cmaps/GB-EUC-H.h"
152
#include "cmaps/GB-EUC-V.h"
153
#include "cmaps/GBK-EUC-H.h"
154
#include "cmaps/GBK-EUC-V.h"
155
#include "cmaps/GBK-X.h"
156
#include "cmaps/GBK2K-H.h"
157
#include "cmaps/GBK2K-V.h"
158
#include "cmaps/GBKp-EUC-H.h"
159
#include "cmaps/GBKp-EUC-V.h"
160
#include "cmaps/GBpc-EUC-H.h"
161
#include "cmaps/GBpc-EUC-V.h"
162
#include "cmaps/H.h"
163
#include "cmaps/HKscs-B5-H.h"
164
#include "cmaps/HKscs-B5-V.h"
165
#include "cmaps/Identity-H.h"
166
#include "cmaps/Identity-V.h"
167
#include "cmaps/KSC-EUC-H.h"
168
#include "cmaps/KSC-EUC-V.h"
169
#include "cmaps/KSCms-UHC-H.h"
170
#include "cmaps/KSCms-UHC-HW-H.h"
171
#include "cmaps/KSCms-UHC-HW-V.h"
172
#include "cmaps/KSCms-UHC-V.h"
173
#include "cmaps/KSCpc-EUC-H.h"
174
#include "cmaps/UniCNS-UCS2-H.h"
175
#include "cmaps/UniCNS-UCS2-V.h"
176
#include "cmaps/UniCNS-UTF16-H.h"
177
#include "cmaps/UniCNS-UTF16-V.h"
178
#include "cmaps/UniCNS-X.h"
179
#include "cmaps/UniGB-UCS2-H.h"
180
#include "cmaps/UniGB-UCS2-V.h"
181
#include "cmaps/UniGB-UTF16-H.h"
182
#include "cmaps/UniGB-UTF16-V.h"
183
#include "cmaps/UniGB-X.h"
184
#include "cmaps/UniJIS-UCS2-H.h"
185
#include "cmaps/UniJIS-UCS2-HW-H.h"
186
#include "cmaps/UniJIS-UCS2-HW-V.h"
187
#include "cmaps/UniJIS-UCS2-V.h"
188
#include "cmaps/UniJIS-UTF16-H.h"
189
#include "cmaps/UniJIS-UTF16-V.h"
190
#include "cmaps/UniJIS-X.h"
191
#include "cmaps/UniKS-UCS2-H.h"
192
#include "cmaps/UniKS-UCS2-V.h"
193
#include "cmaps/UniKS-UTF16-H.h"
194
#include "cmaps/UniKS-UTF16-V.h"
195
#include "cmaps/UniKS-X.h"
196
#include "cmaps/V.h"
197
198
static pdf_cmap *table[] = {
199
  &cmap_83pv_RKSJ_H,
200
  &cmap_90ms_RKSJ_H,
201
  &cmap_90ms_RKSJ_V,
202
  &cmap_90msp_RKSJ_H,
203
  &cmap_90msp_RKSJ_V,
204
  &cmap_90pv_RKSJ_H,
205
  &cmap_Add_RKSJ_H,
206
  &cmap_Add_RKSJ_V,
207
  &cmap_Adobe_CNS1_UCS2,
208
  &cmap_Adobe_GB1_UCS2,
209
  &cmap_Adobe_Japan1_UCS2,
210
  &cmap_Adobe_Korea1_UCS2,
211
  &cmap_B5pc_H,
212
  &cmap_B5pc_V,
213
  &cmap_CNS_EUC_H,
214
  &cmap_CNS_EUC_V,
215
  &cmap_ETen_B5_H,
216
  &cmap_ETen_B5_V,
217
  &cmap_ETenms_B5_H,
218
  &cmap_ETenms_B5_V,
219
  &cmap_EUC_H,
220
  &cmap_EUC_V,
221
  &cmap_Ext_RKSJ_H,
222
  &cmap_Ext_RKSJ_V,
223
  &cmap_GB_EUC_H,
224
  &cmap_GB_EUC_V,
225
  &cmap_GBK_EUC_H,
226
  &cmap_GBK_EUC_V,
227
  &cmap_GBK_X,
228
  &cmap_GBK2K_H,
229
  &cmap_GBK2K_V,
230
  &cmap_GBKp_EUC_H,
231
  &cmap_GBKp_EUC_V,
232
  &cmap_GBpc_EUC_H,
233
  &cmap_GBpc_EUC_V,
234
  &cmap_H,
235
  &cmap_HKscs_B5_H,
236
  &cmap_HKscs_B5_V,
237
  &cmap_Identity_H,
238
  &cmap_Identity_V,
239
  &cmap_KSC_EUC_H,
240
  &cmap_KSC_EUC_V,
241
  &cmap_KSCms_UHC_H,
242
  &cmap_KSCms_UHC_HW_H,
243
  &cmap_KSCms_UHC_HW_V,
244
  &cmap_KSCms_UHC_V,
245
  &cmap_KSCpc_EUC_H,
246
  &cmap_UniCNS_UCS2_H,
247
  &cmap_UniCNS_UCS2_V,
248
  &cmap_UniCNS_UTF16_H,
249
  &cmap_UniCNS_UTF16_V,
250
  &cmap_UniCNS_X,
251
  &cmap_UniGB_UCS2_H,
252
  &cmap_UniGB_UCS2_V,
253
  &cmap_UniGB_UTF16_H,
254
  &cmap_UniGB_UTF16_V,
255
  &cmap_UniGB_X,
256
  &cmap_UniJIS_UCS2_H,
257
  &cmap_UniJIS_UCS2_HW_H,
258
  &cmap_UniJIS_UCS2_HW_V,
259
  &cmap_UniJIS_UCS2_V,
260
  &cmap_UniJIS_UTF16_H,
261
  &cmap_UniJIS_UTF16_V,
262
  &cmap_UniJIS_X,
263
  &cmap_UniKS_UCS2_H,
264
  &cmap_UniKS_UCS2_V,
265
  &cmap_UniKS_UTF16_H,
266
  &cmap_UniKS_UTF16_V,
267
  &cmap_UniKS_X,
268
  &cmap_V,
269
};
270
271
pdf_cmap *
272
pdf_load_builtin_cmap(fz_context *ctx, const char *name)
273
3.13k
{
274
3.13k
  int r = nelem(table)-1;
275
3.13k
  int l = 0;
276
13.6k
  while (l <= r)
277
13.5k
  {
278
13.5k
    int m = (l + r) >> 1;
279
13.5k
    int c = strcmp(name, table[m]->cmap_name);
280
13.5k
    if (c < 0)
281
6.84k
      r = m - 1;
282
6.75k
    else if (c > 0)
283
3.64k
      l = m + 1;
284
3.11k
    else
285
3.11k
      return table[m];
286
13.5k
  }
287
19
  return NULL;
288
3.13k
}
289
290
#endif
291
292
pdf_cmap *
293
pdf_load_system_cmap(fz_context *ctx, const char *cmap_name)
294
3.13k
{
295
3.13k
  pdf_cmap *usecmap;
296
3.13k
  pdf_cmap *cmap;
297
298
3.13k
  cmap = pdf_load_builtin_cmap(ctx, cmap_name);
299
3.13k
  if (!cmap)
300
19
    fz_throw(ctx, FZ_ERROR_GENERIC, "no builtin cmap file: %s", cmap_name);
301
302
3.11k
  if (cmap->usecmap_name[0] && !cmap->usecmap)
303
7
  {
304
7
    usecmap = pdf_load_system_cmap(ctx, cmap->usecmap_name);
305
7
    if (!usecmap)
306
0
      fz_throw(ctx, FZ_ERROR_GENERIC, "no builtin cmap file: %s", cmap->usecmap_name);
307
7
    pdf_set_usecmap(ctx, cmap, usecmap);
308
7
  }
309
310
3.11k
  return cmap;
311
3.11k
}