/src/mupdf/source/pdf/pdf-cmap-load.c
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (C) 2004-2021 Artifex Software, Inc. |
2 | | // |
3 | | // This file is part of MuPDF. |
4 | | // |
5 | | // MuPDF is free software: you can redistribute it and/or modify it under the |
6 | | // terms of the GNU Affero General Public License as published by the Free |
7 | | // Software Foundation, either version 3 of the License, or (at your option) |
8 | | // any later version. |
9 | | // |
10 | | // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY |
11 | | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
12 | | // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more |
13 | | // details. |
14 | | // |
15 | | // You should have received a copy of the GNU Affero General Public License |
16 | | // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> |
17 | | // |
18 | | // Alternative licensing terms are available from the licensor. |
19 | | // For commercial licensing, see <https://www.artifex.com/> or contact |
20 | | // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, |
21 | | // CA 94129, USA, for further information. |
22 | | |
23 | | #include "mupdf/fitz.h" |
24 | | #include "mupdf/pdf.h" |
25 | | |
26 | | #include <string.h> |
27 | | |
28 | | static pdf_cmap * |
29 | | pdf_load_embedded_cmap_imp(fz_context *ctx, pdf_document *doc, pdf_obj *stmobj, pdf_cycle_list *cycle_up) |
30 | 2.89k | { |
31 | 2.89k | pdf_cycle_list cycle; |
32 | 2.89k | fz_stream *file = NULL; |
33 | 2.89k | pdf_cmap *cmap = NULL; |
34 | 2.89k | pdf_cmap *usecmap = NULL; |
35 | 2.89k | pdf_obj *obj; |
36 | | |
37 | 2.89k | fz_var(file); |
38 | 2.89k | fz_var(cmap); |
39 | 2.89k | fz_var(usecmap); |
40 | | |
41 | 2.89k | if ((cmap = pdf_find_item(ctx, pdf_drop_cmap_imp, stmobj)) != NULL) |
42 | 37 | return cmap; |
43 | | |
44 | 5.70k | fz_try(ctx) |
45 | 5.70k | { |
46 | 2.85k | file = pdf_open_stream(ctx, stmobj); |
47 | 2.85k | cmap = pdf_load_cmap(ctx, file); |
48 | | |
49 | 2.85k | obj = pdf_dict_get(ctx, stmobj, PDF_NAME(WMode)); |
50 | 2.85k | if (pdf_is_int(ctx, obj)) |
51 | 97 | pdf_set_cmap_wmode(ctx, cmap, pdf_to_int(ctx, obj)); |
52 | | |
53 | 2.85k | obj = pdf_dict_get(ctx, stmobj, PDF_NAME(UseCMap)); |
54 | 2.85k | if (pdf_is_name(ctx, obj)) |
55 | 0 | { |
56 | 0 | usecmap = pdf_load_system_cmap(ctx, pdf_to_name(ctx, obj)); |
57 | 0 | pdf_set_usecmap(ctx, cmap, usecmap); |
58 | 0 | } |
59 | 2.85k | else if (pdf_is_indirect(ctx, obj)) |
60 | 0 | { |
61 | 0 | if (pdf_cycle(ctx, &cycle, cycle_up, obj)) |
62 | 0 | fz_throw(ctx, FZ_ERROR_GENERIC, "recursive CMap"); |
63 | 0 | usecmap = pdf_load_embedded_cmap_imp(ctx, doc, obj, &cycle); |
64 | 0 | pdf_set_usecmap(ctx, cmap, usecmap); |
65 | 0 | } |
66 | | |
67 | 2.85k | pdf_store_item(ctx, stmobj, cmap, pdf_cmap_size(ctx, cmap)); |
68 | 2.85k | } |
69 | 5.70k | fz_always(ctx) |
70 | 2.85k | { |
71 | 2.85k | fz_drop_stream(ctx, file); |
72 | 2.85k | pdf_drop_cmap(ctx, usecmap); |
73 | 2.85k | } |
74 | 2.85k | fz_catch(ctx) |
75 | 3 | { |
76 | 3 | pdf_drop_cmap(ctx, cmap); |
77 | 3 | fz_rethrow(ctx); |
78 | 3 | } |
79 | | |
80 | 2.85k | return cmap; |
81 | 2.85k | } |
82 | | |
83 | | pdf_cmap * |
84 | | pdf_load_embedded_cmap(fz_context *ctx, pdf_document *doc, pdf_obj *stmobj) |
85 | 2.89k | { |
86 | 2.89k | return pdf_load_embedded_cmap_imp(ctx, doc, stmobj, NULL); |
87 | 2.89k | } |
88 | | |
89 | | pdf_cmap * |
90 | | pdf_new_identity_cmap(fz_context *ctx, int wmode, int bytes) |
91 | 11.5k | { |
92 | 11.5k | pdf_cmap *cmap = pdf_new_cmap(ctx); |
93 | 23.1k | fz_try(ctx) |
94 | 23.1k | { |
95 | 11.5k | unsigned int high = (1 << (bytes * 8)) - 1; |
96 | 11.5k | if (wmode) |
97 | 0 | fz_strlcpy(cmap->cmap_name, "Identity-V", sizeof cmap->cmap_name); |
98 | 11.5k | else |
99 | 11.5k | fz_strlcpy(cmap->cmap_name, "Identity-H", sizeof cmap->cmap_name); |
100 | 11.5k | pdf_add_codespace(ctx, cmap, 0, high, bytes); |
101 | 11.5k | pdf_map_range_to_range(ctx, cmap, 0, high, 0); |
102 | 11.5k | pdf_sort_cmap(ctx, cmap); |
103 | 11.5k | pdf_set_cmap_wmode(ctx, cmap, wmode); |
104 | 11.5k | } |
105 | 23.1k | fz_catch(ctx) |
106 | 0 | { |
107 | 0 | pdf_drop_cmap(ctx, cmap); |
108 | 0 | fz_rethrow(ctx); |
109 | 0 | } |
110 | 11.5k | return cmap; |
111 | 11.5k | } |
112 | | |
113 | | #ifdef NO_CJK |
114 | | |
115 | | pdf_cmap * |
116 | | pdf_load_builtin_cmap(fz_context *ctx, const char *name) |
117 | | { |
118 | | if (!strcmp(name, "Identity-H")) return pdf_new_identity_cmap(ctx, 0, 2); |
119 | | if (!strcmp(name, "Identity-V")) return pdf_new_identity_cmap(ctx, 1, 2); |
120 | | return NULL; |
121 | | } |
122 | | |
123 | | #else |
124 | | |
125 | | /* To regenerate this list: :r !bash scripts/runcmapdump.sh */ |
126 | | |
127 | | #include "cmaps/83pv-RKSJ-H.h" |
128 | | #include "cmaps/90ms-RKSJ-H.h" |
129 | | #include "cmaps/90ms-RKSJ-V.h" |
130 | | #include "cmaps/90msp-RKSJ-H.h" |
131 | | #include "cmaps/90msp-RKSJ-V.h" |
132 | | #include "cmaps/90pv-RKSJ-H.h" |
133 | | #include "cmaps/Add-RKSJ-H.h" |
134 | | #include "cmaps/Add-RKSJ-V.h" |
135 | | #include "cmaps/Adobe-CNS1-UCS2.h" |
136 | | #include "cmaps/Adobe-GB1-UCS2.h" |
137 | | #include "cmaps/Adobe-Japan1-UCS2.h" |
138 | | #include "cmaps/Adobe-Korea1-UCS2.h" |
139 | | #include "cmaps/B5pc-H.h" |
140 | | #include "cmaps/B5pc-V.h" |
141 | | #include "cmaps/CNS-EUC-H.h" |
142 | | #include "cmaps/CNS-EUC-V.h" |
143 | | #include "cmaps/ETen-B5-H.h" |
144 | | #include "cmaps/ETen-B5-V.h" |
145 | | #include "cmaps/ETenms-B5-H.h" |
146 | | #include "cmaps/ETenms-B5-V.h" |
147 | | #include "cmaps/EUC-H.h" |
148 | | #include "cmaps/EUC-V.h" |
149 | | #include "cmaps/Ext-RKSJ-H.h" |
150 | | #include "cmaps/Ext-RKSJ-V.h" |
151 | | #include "cmaps/GB-EUC-H.h" |
152 | | #include "cmaps/GB-EUC-V.h" |
153 | | #include "cmaps/GBK-EUC-H.h" |
154 | | #include "cmaps/GBK-EUC-V.h" |
155 | | #include "cmaps/GBK-X.h" |
156 | | #include "cmaps/GBK2K-H.h" |
157 | | #include "cmaps/GBK2K-V.h" |
158 | | #include "cmaps/GBKp-EUC-H.h" |
159 | | #include "cmaps/GBKp-EUC-V.h" |
160 | | #include "cmaps/GBpc-EUC-H.h" |
161 | | #include "cmaps/GBpc-EUC-V.h" |
162 | | #include "cmaps/H.h" |
163 | | #include "cmaps/HKscs-B5-H.h" |
164 | | #include "cmaps/HKscs-B5-V.h" |
165 | | #include "cmaps/Identity-H.h" |
166 | | #include "cmaps/Identity-V.h" |
167 | | #include "cmaps/KSC-EUC-H.h" |
168 | | #include "cmaps/KSC-EUC-V.h" |
169 | | #include "cmaps/KSCms-UHC-H.h" |
170 | | #include "cmaps/KSCms-UHC-HW-H.h" |
171 | | #include "cmaps/KSCms-UHC-HW-V.h" |
172 | | #include "cmaps/KSCms-UHC-V.h" |
173 | | #include "cmaps/KSCpc-EUC-H.h" |
174 | | #include "cmaps/UniCNS-UCS2-H.h" |
175 | | #include "cmaps/UniCNS-UCS2-V.h" |
176 | | #include "cmaps/UniCNS-UTF16-H.h" |
177 | | #include "cmaps/UniCNS-UTF16-V.h" |
178 | | #include "cmaps/UniCNS-X.h" |
179 | | #include "cmaps/UniGB-UCS2-H.h" |
180 | | #include "cmaps/UniGB-UCS2-V.h" |
181 | | #include "cmaps/UniGB-UTF16-H.h" |
182 | | #include "cmaps/UniGB-UTF16-V.h" |
183 | | #include "cmaps/UniGB-X.h" |
184 | | #include "cmaps/UniJIS-UCS2-H.h" |
185 | | #include "cmaps/UniJIS-UCS2-HW-H.h" |
186 | | #include "cmaps/UniJIS-UCS2-HW-V.h" |
187 | | #include "cmaps/UniJIS-UCS2-V.h" |
188 | | #include "cmaps/UniJIS-UTF16-H.h" |
189 | | #include "cmaps/UniJIS-UTF16-V.h" |
190 | | #include "cmaps/UniJIS-X.h" |
191 | | #include "cmaps/UniKS-UCS2-H.h" |
192 | | #include "cmaps/UniKS-UCS2-V.h" |
193 | | #include "cmaps/UniKS-UTF16-H.h" |
194 | | #include "cmaps/UniKS-UTF16-V.h" |
195 | | #include "cmaps/UniKS-X.h" |
196 | | #include "cmaps/V.h" |
197 | | |
198 | | static pdf_cmap *table[] = { |
199 | | &cmap_83pv_RKSJ_H, |
200 | | &cmap_90ms_RKSJ_H, |
201 | | &cmap_90ms_RKSJ_V, |
202 | | &cmap_90msp_RKSJ_H, |
203 | | &cmap_90msp_RKSJ_V, |
204 | | &cmap_90pv_RKSJ_H, |
205 | | &cmap_Add_RKSJ_H, |
206 | | &cmap_Add_RKSJ_V, |
207 | | &cmap_Adobe_CNS1_UCS2, |
208 | | &cmap_Adobe_GB1_UCS2, |
209 | | &cmap_Adobe_Japan1_UCS2, |
210 | | &cmap_Adobe_Korea1_UCS2, |
211 | | &cmap_B5pc_H, |
212 | | &cmap_B5pc_V, |
213 | | &cmap_CNS_EUC_H, |
214 | | &cmap_CNS_EUC_V, |
215 | | &cmap_ETen_B5_H, |
216 | | &cmap_ETen_B5_V, |
217 | | &cmap_ETenms_B5_H, |
218 | | &cmap_ETenms_B5_V, |
219 | | &cmap_EUC_H, |
220 | | &cmap_EUC_V, |
221 | | &cmap_Ext_RKSJ_H, |
222 | | &cmap_Ext_RKSJ_V, |
223 | | &cmap_GB_EUC_H, |
224 | | &cmap_GB_EUC_V, |
225 | | &cmap_GBK_EUC_H, |
226 | | &cmap_GBK_EUC_V, |
227 | | &cmap_GBK_X, |
228 | | &cmap_GBK2K_H, |
229 | | &cmap_GBK2K_V, |
230 | | &cmap_GBKp_EUC_H, |
231 | | &cmap_GBKp_EUC_V, |
232 | | &cmap_GBpc_EUC_H, |
233 | | &cmap_GBpc_EUC_V, |
234 | | &cmap_H, |
235 | | &cmap_HKscs_B5_H, |
236 | | &cmap_HKscs_B5_V, |
237 | | &cmap_Identity_H, |
238 | | &cmap_Identity_V, |
239 | | &cmap_KSC_EUC_H, |
240 | | &cmap_KSC_EUC_V, |
241 | | &cmap_KSCms_UHC_H, |
242 | | &cmap_KSCms_UHC_HW_H, |
243 | | &cmap_KSCms_UHC_HW_V, |
244 | | &cmap_KSCms_UHC_V, |
245 | | &cmap_KSCpc_EUC_H, |
246 | | &cmap_UniCNS_UCS2_H, |
247 | | &cmap_UniCNS_UCS2_V, |
248 | | &cmap_UniCNS_UTF16_H, |
249 | | &cmap_UniCNS_UTF16_V, |
250 | | &cmap_UniCNS_X, |
251 | | &cmap_UniGB_UCS2_H, |
252 | | &cmap_UniGB_UCS2_V, |
253 | | &cmap_UniGB_UTF16_H, |
254 | | &cmap_UniGB_UTF16_V, |
255 | | &cmap_UniGB_X, |
256 | | &cmap_UniJIS_UCS2_H, |
257 | | &cmap_UniJIS_UCS2_HW_H, |
258 | | &cmap_UniJIS_UCS2_HW_V, |
259 | | &cmap_UniJIS_UCS2_V, |
260 | | &cmap_UniJIS_UTF16_H, |
261 | | &cmap_UniJIS_UTF16_V, |
262 | | &cmap_UniJIS_X, |
263 | | &cmap_UniKS_UCS2_H, |
264 | | &cmap_UniKS_UCS2_V, |
265 | | &cmap_UniKS_UTF16_H, |
266 | | &cmap_UniKS_UTF16_V, |
267 | | &cmap_UniKS_X, |
268 | | &cmap_V, |
269 | | }; |
270 | | |
271 | | pdf_cmap * |
272 | | pdf_load_builtin_cmap(fz_context *ctx, const char *name) |
273 | 3.13k | { |
274 | 3.13k | int r = nelem(table)-1; |
275 | 3.13k | int l = 0; |
276 | 13.6k | while (l <= r) |
277 | 13.5k | { |
278 | 13.5k | int m = (l + r) >> 1; |
279 | 13.5k | int c = strcmp(name, table[m]->cmap_name); |
280 | 13.5k | if (c < 0) |
281 | 6.84k | r = m - 1; |
282 | 6.75k | else if (c > 0) |
283 | 3.64k | l = m + 1; |
284 | 3.11k | else |
285 | 3.11k | return table[m]; |
286 | 13.5k | } |
287 | 19 | return NULL; |
288 | 3.13k | } |
289 | | |
290 | | #endif |
291 | | |
292 | | pdf_cmap * |
293 | | pdf_load_system_cmap(fz_context *ctx, const char *cmap_name) |
294 | 3.13k | { |
295 | 3.13k | pdf_cmap *usecmap; |
296 | 3.13k | pdf_cmap *cmap; |
297 | | |
298 | 3.13k | cmap = pdf_load_builtin_cmap(ctx, cmap_name); |
299 | 3.13k | if (!cmap) |
300 | 19 | fz_throw(ctx, FZ_ERROR_GENERIC, "no builtin cmap file: %s", cmap_name); |
301 | | |
302 | 3.11k | if (cmap->usecmap_name[0] && !cmap->usecmap) |
303 | 7 | { |
304 | 7 | usecmap = pdf_load_system_cmap(ctx, cmap->usecmap_name); |
305 | 7 | if (!usecmap) |
306 | 0 | fz_throw(ctx, FZ_ERROR_GENERIC, "no builtin cmap file: %s", cmap->usecmap_name); |
307 | 7 | pdf_set_usecmap(ctx, cmap, usecmap); |
308 | 7 | } |
309 | | |
310 | 3.11k | return cmap; |
311 | 3.11k | } |