/src/mupdf/source/pdf/pdf-cmap-load.c
Line | Count | Source |
1 | | // Copyright (C) 2004-2024 Artifex Software, Inc. |
2 | | // |
3 | | // This file is part of MuPDF. |
4 | | // |
5 | | // MuPDF is free software: you can redistribute it and/or modify it under the |
6 | | // terms of the GNU Affero General Public License as published by the Free |
7 | | // Software Foundation, either version 3 of the License, or (at your option) |
8 | | // any later version. |
9 | | // |
10 | | // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY |
11 | | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
12 | | // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more |
13 | | // details. |
14 | | // |
15 | | // You should have received a copy of the GNU Affero General Public License |
16 | | // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> |
17 | | // |
18 | | // Alternative licensing terms are available from the licensor. |
19 | | // For commercial licensing, see <https://www.artifex.com/> or contact |
20 | | // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, |
21 | | // CA 94129, USA, for further information. |
22 | | |
23 | | #include "mupdf/fitz.h" |
24 | | #include "mupdf/pdf.h" |
25 | | |
26 | | #include "cmaps/TrueType-UCS2.h" |
27 | | |
28 | | #include <string.h> |
29 | | |
30 | | static pdf_cmap * |
31 | | pdf_load_embedded_cmap_imp(fz_context *ctx, pdf_document *doc, pdf_obj *stmobj, pdf_cycle_list *cycle_up) |
32 | 1 | { |
33 | 1 | pdf_cycle_list cycle; |
34 | 1 | fz_stream *file = NULL; |
35 | 1 | pdf_cmap *cmap = NULL; |
36 | 1 | pdf_cmap *usecmap = NULL; |
37 | 1 | pdf_obj *obj; |
38 | | |
39 | 1 | fz_var(file); |
40 | 1 | fz_var(cmap); |
41 | 1 | fz_var(usecmap); |
42 | | |
43 | 1 | if ((cmap = pdf_find_item(ctx, pdf_drop_cmap_imp, stmobj)) != NULL) |
44 | 0 | return cmap; |
45 | | |
46 | 2 | fz_try(ctx) |
47 | 2 | { |
48 | 1 | file = pdf_open_stream(ctx, stmobj); |
49 | 1 | cmap = pdf_load_cmap(ctx, file); |
50 | | |
51 | 1 | obj = pdf_dict_get(ctx, stmobj, PDF_NAME(WMode)); |
52 | 1 | if (pdf_is_int(ctx, obj)) |
53 | 0 | pdf_set_cmap_wmode(ctx, cmap, pdf_to_int(ctx, obj)); |
54 | | |
55 | 1 | obj = pdf_dict_get(ctx, stmobj, PDF_NAME(UseCMap)); |
56 | 1 | if (pdf_is_name(ctx, obj)) |
57 | 0 | { |
58 | 0 | fz_try(ctx) |
59 | 0 | { |
60 | 0 | usecmap = pdf_load_system_cmap(ctx, pdf_to_name(ctx, obj)); |
61 | 0 | pdf_set_usecmap(ctx, cmap, usecmap); |
62 | 0 | } |
63 | 0 | fz_catch(ctx) |
64 | 0 | { |
65 | 0 | fz_rethrow_if(ctx, FZ_ERROR_SYSTEM); |
66 | 0 | fz_report_error(ctx); |
67 | 0 | fz_warn(ctx, "cannot load system CMap: %s", pdf_to_name(ctx, obj)); |
68 | 0 | } |
69 | 0 | } |
70 | 1 | else if (pdf_is_indirect(ctx, obj)) |
71 | 0 | { |
72 | 0 | if (pdf_cycle(ctx, &cycle, cycle_up, obj)) |
73 | 0 | fz_throw(ctx, FZ_ERROR_FORMAT, "recursive CMap"); |
74 | 0 | usecmap = pdf_load_embedded_cmap_imp(ctx, doc, obj, &cycle); |
75 | 0 | pdf_set_usecmap(ctx, cmap, usecmap); |
76 | 0 | } |
77 | 1 | else if (strlen(cmap->usecmap_name) > 0) |
78 | 0 | { |
79 | 0 | fz_try(ctx) |
80 | 0 | { |
81 | 0 | usecmap = pdf_load_system_cmap(ctx, cmap->usecmap_name); |
82 | 0 | pdf_set_usecmap(ctx, cmap, usecmap); |
83 | 0 | } |
84 | 0 | fz_catch(ctx) |
85 | 0 | { |
86 | 0 | fz_rethrow_if(ctx, FZ_ERROR_SYSTEM); |
87 | 0 | fz_report_error(ctx); |
88 | 0 | fz_warn(ctx, "cannot load system CMap: %s", pdf_to_name(ctx, obj)); |
89 | 0 | } |
90 | 0 | } |
91 | | |
92 | 1 | pdf_store_item(ctx, stmobj, cmap, pdf_cmap_size(ctx, cmap)); |
93 | 1 | } |
94 | 2 | fz_always(ctx) |
95 | 1 | { |
96 | 1 | fz_drop_stream(ctx, file); |
97 | 1 | pdf_drop_cmap(ctx, usecmap); |
98 | 1 | } |
99 | 1 | fz_catch(ctx) |
100 | 0 | { |
101 | 0 | pdf_drop_cmap(ctx, cmap); |
102 | 0 | fz_rethrow(ctx); |
103 | 0 | } |
104 | | |
105 | 1 | return cmap; |
106 | 1 | } |
107 | | |
108 | | pdf_cmap * |
109 | | pdf_load_embedded_cmap(fz_context *ctx, pdf_document *doc, pdf_obj *stmobj) |
110 | 1 | { |
111 | 1 | return pdf_load_embedded_cmap_imp(ctx, doc, stmobj, NULL); |
112 | 1 | } |
113 | | |
114 | | pdf_cmap * |
115 | | pdf_new_identity_cmap(fz_context *ctx, int wmode, int bytes) |
116 | 0 | { |
117 | 0 | pdf_cmap *cmap = pdf_new_cmap(ctx); |
118 | 0 | fz_try(ctx) |
119 | 0 | { |
120 | 0 | unsigned int high = (1 << (bytes * 8)) - 1; |
121 | 0 | if (wmode) |
122 | 0 | fz_strlcpy(cmap->cmap_name, "Identity-V", sizeof cmap->cmap_name); |
123 | 0 | else |
124 | 0 | fz_strlcpy(cmap->cmap_name, "Identity-H", sizeof cmap->cmap_name); |
125 | 0 | pdf_add_codespace(ctx, cmap, 0, high, bytes); |
126 | 0 | pdf_map_range_to_range(ctx, cmap, 0, high, 0); |
127 | 0 | pdf_sort_cmap(ctx, cmap); |
128 | 0 | pdf_set_cmap_wmode(ctx, cmap, wmode); |
129 | 0 | } |
130 | 0 | fz_catch(ctx) |
131 | 0 | { |
132 | 0 | pdf_drop_cmap(ctx, cmap); |
133 | 0 | fz_rethrow(ctx); |
134 | 0 | } |
135 | 0 | return cmap; |
136 | 0 | } |
137 | | |
138 | | #ifdef NO_CJK |
139 | | |
140 | | pdf_cmap * |
141 | | pdf_load_builtin_cmap(fz_context *ctx, const char *name) |
142 | | { |
143 | | if (!strcmp(name, "Identity-H")) return pdf_new_identity_cmap(ctx, 0, 2); |
144 | | if (!strcmp(name, "Identity-V")) return pdf_new_identity_cmap(ctx, 1, 2); |
145 | | if (!strcmp(name, "TrueType-UCS2")) return &cmap_TrueType_UCS2; |
146 | | return NULL; |
147 | | } |
148 | | |
149 | | #else |
150 | | |
151 | | /* To regenerate this list: :r !bash scripts/runcmapdump.sh */ |
152 | | |
153 | | #include "cmaps/83pv-RKSJ-H.h" |
154 | | #include "cmaps/90ms-RKSJ-H.h" |
155 | | #include "cmaps/90ms-RKSJ-V.h" |
156 | | #include "cmaps/90msp-RKSJ-H.h" |
157 | | #include "cmaps/90msp-RKSJ-V.h" |
158 | | #include "cmaps/90pv-RKSJ-H.h" |
159 | | #include "cmaps/Add-RKSJ-H.h" |
160 | | #include "cmaps/Add-RKSJ-V.h" |
161 | | #include "cmaps/Adobe-CNS1-UCS2.h" |
162 | | #include "cmaps/Adobe-GB1-UCS2.h" |
163 | | #include "cmaps/Adobe-Japan1-UCS2.h" |
164 | | #include "cmaps/Adobe-Korea1-UCS2.h" |
165 | | #include "cmaps/B5pc-H.h" |
166 | | #include "cmaps/B5pc-V.h" |
167 | | #include "cmaps/CNS-EUC-H.h" |
168 | | #include "cmaps/CNS-EUC-V.h" |
169 | | #include "cmaps/ETen-B5-H.h" |
170 | | #include "cmaps/ETen-B5-V.h" |
171 | | #include "cmaps/ETenms-B5-H.h" |
172 | | #include "cmaps/ETenms-B5-V.h" |
173 | | #include "cmaps/EUC-H.h" |
174 | | #include "cmaps/EUC-V.h" |
175 | | #include "cmaps/Ext-RKSJ-H.h" |
176 | | #include "cmaps/Ext-RKSJ-V.h" |
177 | | #include "cmaps/GB-EUC-H.h" |
178 | | #include "cmaps/GB-EUC-V.h" |
179 | | #include "cmaps/GBK-EUC-H.h" |
180 | | #include "cmaps/GBK-EUC-V.h" |
181 | | #include "cmaps/GBK-X.h" |
182 | | #include "cmaps/GBK2K-H.h" |
183 | | #include "cmaps/GBK2K-V.h" |
184 | | #include "cmaps/GBKp-EUC-H.h" |
185 | | #include "cmaps/GBKp-EUC-V.h" |
186 | | #include "cmaps/GBpc-EUC-H.h" |
187 | | #include "cmaps/GBpc-EUC-V.h" |
188 | | #include "cmaps/H.h" |
189 | | #include "cmaps/HKscs-B5-H.h" |
190 | | #include "cmaps/HKscs-B5-V.h" |
191 | | #include "cmaps/Identity-H.h" |
192 | | #include "cmaps/Identity-V.h" |
193 | | #include "cmaps/KSC-EUC-H.h" |
194 | | #include "cmaps/KSC-EUC-V.h" |
195 | | #include "cmaps/KSCms-UHC-H.h" |
196 | | #include "cmaps/KSCms-UHC-HW-H.h" |
197 | | #include "cmaps/KSCms-UHC-HW-V.h" |
198 | | #include "cmaps/KSCms-UHC-V.h" |
199 | | #include "cmaps/KSCpc-EUC-H.h" |
200 | | #include "cmaps/UniCNS-UCS2-H.h" |
201 | | #include "cmaps/UniCNS-UCS2-V.h" |
202 | | #include "cmaps/UniCNS-UTF16-H.h" |
203 | | #include "cmaps/UniCNS-UTF16-V.h" |
204 | | #include "cmaps/UniCNS-X.h" |
205 | | #include "cmaps/UniGB-UCS2-H.h" |
206 | | #include "cmaps/UniGB-UCS2-V.h" |
207 | | #include "cmaps/UniGB-UTF16-H.h" |
208 | | #include "cmaps/UniGB-UTF16-V.h" |
209 | | #include "cmaps/UniGB-X.h" |
210 | | #include "cmaps/UniJIS-UCS2-H.h" |
211 | | #include "cmaps/UniJIS-UCS2-HW-H.h" |
212 | | #include "cmaps/UniJIS-UCS2-HW-V.h" |
213 | | #include "cmaps/UniJIS-UCS2-V.h" |
214 | | #include "cmaps/UniJIS-UTF16-H.h" |
215 | | #include "cmaps/UniJIS-UTF16-V.h" |
216 | | #include "cmaps/UniJIS-X.h" |
217 | | #include "cmaps/UniKS-UCS2-H.h" |
218 | | #include "cmaps/UniKS-UCS2-V.h" |
219 | | #include "cmaps/UniKS-UTF16-H.h" |
220 | | #include "cmaps/UniKS-UTF16-V.h" |
221 | | #include "cmaps/UniKS-X.h" |
222 | | #include "cmaps/V.h" |
223 | | |
224 | | static pdf_cmap *table[] = { |
225 | | &cmap_83pv_RKSJ_H, |
226 | | &cmap_90ms_RKSJ_H, |
227 | | &cmap_90ms_RKSJ_V, |
228 | | &cmap_90msp_RKSJ_H, |
229 | | &cmap_90msp_RKSJ_V, |
230 | | &cmap_90pv_RKSJ_H, |
231 | | &cmap_Add_RKSJ_H, |
232 | | &cmap_Add_RKSJ_V, |
233 | | &cmap_Adobe_CNS1_UCS2, |
234 | | &cmap_Adobe_GB1_UCS2, |
235 | | &cmap_Adobe_Japan1_UCS2, |
236 | | &cmap_Adobe_Korea1_UCS2, |
237 | | &cmap_B5pc_H, |
238 | | &cmap_B5pc_V, |
239 | | &cmap_CNS_EUC_H, |
240 | | &cmap_CNS_EUC_V, |
241 | | &cmap_ETen_B5_H, |
242 | | &cmap_ETen_B5_V, |
243 | | &cmap_ETenms_B5_H, |
244 | | &cmap_ETenms_B5_V, |
245 | | &cmap_EUC_H, |
246 | | &cmap_EUC_V, |
247 | | &cmap_Ext_RKSJ_H, |
248 | | &cmap_Ext_RKSJ_V, |
249 | | &cmap_GB_EUC_H, |
250 | | &cmap_GB_EUC_V, |
251 | | &cmap_GBK_EUC_H, |
252 | | &cmap_GBK_EUC_V, |
253 | | &cmap_GBK_X, |
254 | | &cmap_GBK2K_H, |
255 | | &cmap_GBK2K_V, |
256 | | &cmap_GBKp_EUC_H, |
257 | | &cmap_GBKp_EUC_V, |
258 | | &cmap_GBpc_EUC_H, |
259 | | &cmap_GBpc_EUC_V, |
260 | | &cmap_H, |
261 | | &cmap_HKscs_B5_H, |
262 | | &cmap_HKscs_B5_V, |
263 | | &cmap_Identity_H, |
264 | | &cmap_Identity_V, |
265 | | &cmap_KSC_EUC_H, |
266 | | &cmap_KSC_EUC_V, |
267 | | &cmap_KSCms_UHC_H, |
268 | | &cmap_KSCms_UHC_HW_H, |
269 | | &cmap_KSCms_UHC_HW_V, |
270 | | &cmap_KSCms_UHC_V, |
271 | | &cmap_KSCpc_EUC_H, |
272 | | &cmap_TrueType_UCS2, |
273 | | &cmap_UniCNS_UCS2_H, |
274 | | &cmap_UniCNS_UCS2_V, |
275 | | &cmap_UniCNS_UTF16_H, |
276 | | &cmap_UniCNS_UTF16_V, |
277 | | &cmap_UniCNS_X, |
278 | | &cmap_UniGB_UCS2_H, |
279 | | &cmap_UniGB_UCS2_V, |
280 | | &cmap_UniGB_UTF16_H, |
281 | | &cmap_UniGB_UTF16_V, |
282 | | &cmap_UniGB_X, |
283 | | &cmap_UniJIS_UCS2_H, |
284 | | &cmap_UniJIS_UCS2_HW_H, |
285 | | &cmap_UniJIS_UCS2_HW_V, |
286 | | &cmap_UniJIS_UCS2_V, |
287 | | &cmap_UniJIS_UTF16_H, |
288 | | &cmap_UniJIS_UTF16_V, |
289 | | &cmap_UniJIS_X, |
290 | | &cmap_UniKS_UCS2_H, |
291 | | &cmap_UniKS_UCS2_V, |
292 | | &cmap_UniKS_UTF16_H, |
293 | | &cmap_UniKS_UTF16_V, |
294 | | &cmap_UniKS_X, |
295 | | &cmap_V, |
296 | | }; |
297 | | |
298 | | pdf_cmap * |
299 | | pdf_load_builtin_cmap(fz_context *ctx, const char *name) |
300 | 1 | { |
301 | 1 | int r = nelem(table)-1; |
302 | 1 | int l = 0; |
303 | 6 | while (l <= r) |
304 | 6 | { |
305 | 6 | int m = (l + r) >> 1; |
306 | 6 | int c = strcmp(name, table[m]->cmap_name); |
307 | 6 | if (c < 0) |
308 | 3 | r = m - 1; |
309 | 3 | else if (c > 0) |
310 | 2 | l = m + 1; |
311 | 1 | else |
312 | 1 | return table[m]; |
313 | 6 | } |
314 | 0 | return NULL; |
315 | 1 | } |
316 | | |
317 | | #endif |
318 | | |
319 | | pdf_cmap * |
320 | | pdf_load_system_cmap(fz_context *ctx, const char *cmap_name) |
321 | 1 | { |
322 | 1 | pdf_cmap *usecmap; |
323 | 1 | pdf_cmap *cmap; |
324 | | |
325 | 1 | cmap = pdf_load_builtin_cmap(ctx, cmap_name); |
326 | 1 | if (!cmap) |
327 | 0 | fz_throw(ctx, FZ_ERROR_FORMAT, "no builtin cmap file: %s", cmap_name); |
328 | | |
329 | 1 | if (cmap->usecmap_name[0] && !cmap->usecmap) |
330 | 0 | { |
331 | 0 | usecmap = pdf_load_system_cmap(ctx, cmap->usecmap_name); |
332 | 0 | if (!usecmap) |
333 | 0 | fz_throw(ctx, FZ_ERROR_FORMAT, "no builtin cmap file: %s", cmap->usecmap_name); |
334 | 0 | pdf_set_usecmap(ctx, cmap, usecmap); |
335 | 0 | } |
336 | | |
337 | 1 | return cmap; |
338 | 1 | } |