/src/qpdf/libqpdf/QUtil.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Include qpdf-config.h first so off_t is guaranteed to have the right size. |
2 | | #include <qpdf/qpdf-config.h> |
3 | | |
4 | | #include <qpdf/QUtil.hh> |
5 | | #include <qpdf/Util.hh> |
6 | | |
7 | | #include <qpdf/CryptoRandomDataProvider.hh> |
8 | | #include <qpdf/Pipeline.hh> |
9 | | #include <qpdf/QIntC.hh> |
10 | | #include <qpdf/QPDFSystemError.hh> |
11 | | #include <qpdf/QTC.hh> |
12 | | #include <qpdf/Util.hh> |
13 | | |
14 | | #include <cerrno> |
15 | | #include <cstdlib> |
16 | | #include <cstring> |
17 | | #include <fcntl.h> |
18 | | #include <fstream> |
19 | | #include <iomanip> |
20 | | #include <map> |
21 | | #include <memory> |
22 | | #include <regex> |
23 | | #include <set> |
24 | | #include <sstream> |
25 | | #include <stdexcept> |
26 | | #ifndef QPDF_NO_WCHAR_T |
27 | | # include <cwchar> |
28 | | #endif |
29 | | #ifdef _WIN32 |
30 | | # define WIN32_LEAN_AND_MEAN |
31 | | # include <direct.h> |
32 | | # include <io.h> |
33 | | # include <windows.h> |
34 | | #else |
35 | | # include <sys/stat.h> |
36 | | # include <unistd.h> |
37 | | #endif |
38 | | #ifdef HAVE_MALLOC_INFO |
39 | | # include <malloc.h> |
40 | | #endif |
41 | | |
42 | | using namespace qpdf; |
43 | | |
44 | | // First element is 24 |
45 | | static unsigned short pdf_doc_low_to_unicode[] = { |
46 | | 0x02d8, // 0x18 BREVE |
47 | | 0x02c7, // 0x19 CARON |
48 | | 0x02c6, // 0x1a MODIFIER LETTER CIRCUMFLEX ACCENT |
49 | | 0x02d9, // 0x1b DOT ABOVE |
50 | | 0x02dd, // 0x1c DOUBLE ACUTE ACCENT |
51 | | 0x02db, // 0x1d OGONEK |
52 | | 0x02da, // 0x1e RING ABOVE |
53 | | 0x02dc, // 0x1f SMALL TILDE |
54 | | }; |
55 | | // First element is 127 |
56 | | static unsigned short pdf_doc_to_unicode[] = { |
57 | | 0xfffd, // 0x7f UNDEFINED |
58 | | 0x2022, // 0x80 BULLET |
59 | | 0x2020, // 0x81 DAGGER |
60 | | 0x2021, // 0x82 DOUBLE DAGGER |
61 | | 0x2026, // 0x83 HORIZONTAL ELLIPSIS |
62 | | 0x2014, // 0x84 EM DASH |
63 | | 0x2013, // 0x85 EN DASH |
64 | | 0x0192, // 0x86 SMALL LETTER F WITH HOOK |
65 | | 0x2044, // 0x87 FRACTION SLASH (solidus) |
66 | | 0x2039, // 0x88 SINGLE LEFT-POINTING ANGLE QUOTATION MARK |
67 | | 0x203a, // 0x89 SINGLE RIGHT-POINTING ANGLE QUOTATION MARK |
68 | | 0x2212, // 0x8a MINUS SIGN |
69 | | 0x2030, // 0x8b PER MILLE SIGN |
70 | | 0x201e, // 0x8c DOUBLE LOW-9 QUOTATION MARK (quotedblbase) |
71 | | 0x201c, // 0x8d LEFT DOUBLE QUOTATION MARK (double quote left) |
72 | | 0x201d, // 0x8e RIGHT DOUBLE QUOTATION MARK (quotedblright) |
73 | | 0x2018, // 0x8f LEFT SINGLE QUOTATION MARK (quoteleft) |
74 | | 0x2019, // 0x90 RIGHT SINGLE QUOTATION MARK (quoteright) |
75 | | 0x201a, // 0x91 SINGLE LOW-9 QUOTATION MARK (quotesinglbase) |
76 | | 0x2122, // 0x92 TRADE MARK SIGN |
77 | | 0xfb01, // 0x93 LATIN SMALL LIGATURE FI |
78 | | 0xfb02, // 0x94 LATIN SMALL LIGATURE FL |
79 | | 0x0141, // 0x95 LATIN CAPITAL LETTER L WITH STROKE |
80 | | 0x0152, // 0x96 LATIN CAPITAL LIGATURE OE |
81 | | 0x0160, // 0x97 LATIN CAPITAL LETTER S WITH CARON |
82 | | 0x0178, // 0x98 LATIN CAPITAL LETTER Y WITH DIAERESIS |
83 | | 0x017d, // 0x99 LATIN CAPITAL LETTER Z WITH CARON |
84 | | 0x0131, // 0x9a LATIN SMALL LETTER DOTLESS I |
85 | | 0x0142, // 0x9b LATIN SMALL LETTER L WITH STROKE |
86 | | 0x0153, // 0x9c LATIN SMALL LIGATURE OE |
87 | | 0x0161, // 0x9d LATIN SMALL LETTER S WITH CARON |
88 | | 0x017e, // 0x9e LATIN SMALL LETTER Z WITH CARON |
89 | | 0xfffd, // 0x9f UNDEFINED |
90 | | 0x20ac, // 0xa0 EURO SIGN |
91 | | }; |
92 | | static unsigned short win_ansi_to_unicode[] = { |
93 | | 0x20ac, // 0x80 |
94 | | 0xfffd, // 0x81 |
95 | | 0x201a, // 0x82 |
96 | | 0x0192, // 0x83 |
97 | | 0x201e, // 0x84 |
98 | | 0x2026, // 0x85 |
99 | | 0x2020, // 0x86 |
100 | | 0x2021, // 0x87 |
101 | | 0x02c6, // 0x88 |
102 | | 0x2030, // 0x89 |
103 | | 0x0160, // 0x8a |
104 | | 0x2039, // 0x8b |
105 | | 0x0152, // 0x8c |
106 | | 0xfffd, // 0x8d |
107 | | 0x017d, // 0x8e |
108 | | 0xfffd, // 0x8f |
109 | | 0xfffd, // 0x90 |
110 | | 0x2018, // 0x91 |
111 | | 0x2019, // 0x92 |
112 | | 0x201c, // 0x93 |
113 | | 0x201d, // 0x94 |
114 | | 0x2022, // 0x95 |
115 | | 0x2013, // 0x96 |
116 | | 0x2014, // 0x97 |
117 | | 0x0303, // 0x98 |
118 | | 0x2122, // 0x99 |
119 | | 0x0161, // 0x9a |
120 | | 0x203a, // 0x9b |
121 | | 0x0153, // 0x9c |
122 | | 0xfffd, // 0x9d |
123 | | 0x017e, // 0x9e |
124 | | 0x0178, // 0x9f |
125 | | 0x00a0, // 0xa0 |
126 | | }; |
127 | | static unsigned short mac_roman_to_unicode[] = { |
128 | | 0x00c4, // 0x80 |
129 | | 0x00c5, // 0x81 |
130 | | 0x00c7, // 0x82 |
131 | | 0x00c9, // 0x83 |
132 | | 0x00d1, // 0x84 |
133 | | 0x00d6, // 0x85 |
134 | | 0x00dc, // 0x86 |
135 | | 0x00e1, // 0x87 |
136 | | 0x00e0, // 0x88 |
137 | | 0x00e2, // 0x89 |
138 | | 0x00e4, // 0x8a |
139 | | 0x00e3, // 0x8b |
140 | | 0x00e5, // 0x8c |
141 | | 0x00e7, // 0x8d |
142 | | 0x00e9, // 0x8e |
143 | | 0x00e8, // 0x8f |
144 | | 0x00ea, // 0x90 |
145 | | 0x00eb, // 0x91 |
146 | | 0x00ed, // 0x92 |
147 | | 0x00ec, // 0x93 |
148 | | 0x00ee, // 0x94 |
149 | | 0x00ef, // 0x95 |
150 | | 0x00f1, // 0x96 |
151 | | 0x00f3, // 0x97 |
152 | | 0x00f2, // 0x98 |
153 | | 0x00f4, // 0x99 |
154 | | 0x00f6, // 0x9a |
155 | | 0x00f5, // 0x9b |
156 | | 0x00fa, // 0x9c |
157 | | 0x00f9, // 0x9d |
158 | | 0x00fb, // 0x9e |
159 | | 0x00fc, // 0x9f |
160 | | 0x2020, // 0xa0 |
161 | | 0x00b0, // 0xa1 |
162 | | 0x00a2, // 0xa2 |
163 | | 0x00a3, // 0xa3 |
164 | | 0x00a7, // 0xa4 |
165 | | 0x2022, // 0xa5 |
166 | | 0x00b6, // 0xa6 |
167 | | 0x00df, // 0xa7 |
168 | | 0x00ae, // 0xa8 |
169 | | 0x00a9, // 0xa9 |
170 | | 0x2122, // 0xaa |
171 | | 0x0301, // 0xab |
172 | | 0x0308, // 0xac |
173 | | 0xfffd, // 0xad |
174 | | 0x00c6, // 0xae |
175 | | 0x00d8, // 0xaf |
176 | | 0xfffd, // 0xb0 |
177 | | 0x00b1, // 0xb1 |
178 | | 0xfffd, // 0xb2 |
179 | | 0xfffd, // 0xb3 |
180 | | 0x00a5, // 0xb4 |
181 | | 0x03bc, // 0xb5 |
182 | | 0xfffd, // 0xb6 |
183 | | 0xfffd, // 0xb7 |
184 | | 0xfffd, // 0xb8 |
185 | | 0xfffd, // 0xb9 |
186 | | 0xfffd, // 0xba |
187 | | 0x1d43, // 0xbb |
188 | | 0x1d52, // 0xbc |
189 | | 0xfffd, // 0xbd |
190 | | 0x00e6, // 0xbe |
191 | | 0x00f8, // 0xbf |
192 | | 0x00bf, // 0xc0 |
193 | | 0x00a1, // 0xc1 |
194 | | 0x00ac, // 0xc2 |
195 | | 0xfffd, // 0xc3 |
196 | | 0x0192, // 0xc4 |
197 | | 0xfffd, // 0xc5 |
198 | | 0xfffd, // 0xc6 |
199 | | 0x00ab, // 0xc7 |
200 | | 0x00bb, // 0xc8 |
201 | | 0x2026, // 0xc9 |
202 | | 0xfffd, // 0xca |
203 | | 0x00c0, // 0xcb |
204 | | 0x00c3, // 0xcc |
205 | | 0x00d5, // 0xcd |
206 | | 0x0152, // 0xce |
207 | | 0x0153, // 0xcf |
208 | | 0x2013, // 0xd0 |
209 | | 0x2014, // 0xd1 |
210 | | 0x201c, // 0xd2 |
211 | | 0x201d, // 0xd3 |
212 | | 0x2018, // 0xd4 |
213 | | 0x2019, // 0xd5 |
214 | | 0x00f7, // 0xd6 |
215 | | 0xfffd, // 0xd7 |
216 | | 0x00ff, // 0xd8 |
217 | | 0x0178, // 0xd9 |
218 | | 0x2044, // 0xda |
219 | | 0x00a4, // 0xdb |
220 | | 0x2039, // 0xdc |
221 | | 0x203a, // 0xdd |
222 | | 0xfb01, // 0xde |
223 | | 0xfb02, // 0xdf |
224 | | 0x2021, // 0xe0 |
225 | | 0x00b7, // 0xe1 |
226 | | 0x201a, // 0xe2 |
227 | | 0x201e, // 0xe3 |
228 | | 0x2030, // 0xe4 |
229 | | 0x00c2, // 0xe5 |
230 | | 0x00ca, // 0xe6 |
231 | | 0x00c1, // 0xe7 |
232 | | 0x00cb, // 0xe8 |
233 | | 0x00c8, // 0xe9 |
234 | | 0x00cd, // 0xea |
235 | | 0x00ce, // 0xeb |
236 | | 0x00cf, // 0xec |
237 | | 0x00cc, // 0xed |
238 | | 0x00d3, // 0xee |
239 | | 0x00d4, // 0xef |
240 | | 0xfffd, // 0xf0 |
241 | | 0x00d2, // 0xf1 |
242 | | 0x00da, // 0xf2 |
243 | | 0x00db, // 0xf3 |
244 | | 0x00d9, // 0xf4 |
245 | | 0x0131, // 0xf5 |
246 | | 0x02c6, // 0xf6 |
247 | | 0x0303, // 0xf7 |
248 | | 0x0304, // 0xf8 |
249 | | 0x0306, // 0xf9 |
250 | | 0x0307, // 0xfa |
251 | | 0x030a, // 0xfb |
252 | | 0x0327, // 0xfc |
253 | | 0x030b, // 0xfd |
254 | | 0x0328, // 0xfe |
255 | | 0x02c7, // 0xff |
256 | | }; |
257 | | |
258 | | static std::map<unsigned long, unsigned char> unicode_to_win_ansi = { |
259 | | {0x20ac, 0x80}, {0x201a, 0x82}, {0x192, 0x83}, {0x201e, 0x84}, {0x2026, 0x85}, {0x2020, 0x86}, |
260 | | {0x2021, 0x87}, {0x2c6, 0x88}, {0x2030, 0x89}, {0x160, 0x8a}, {0x2039, 0x8b}, {0x152, 0x8c}, |
261 | | {0x17d, 0x8e}, {0x2018, 0x91}, {0x2019, 0x92}, {0x201c, 0x93}, {0x201d, 0x94}, {0x2022, 0x95}, |
262 | | {0x2013, 0x96}, {0x2014, 0x97}, {0x303, 0x98}, {0x2122, 0x99}, {0x161, 0x9a}, {0x203a, 0x9b}, |
263 | | {0x153, 0x9c}, {0x17e, 0x9e}, {0x178, 0x9f}, {0xa0, 0xa0}, |
264 | | }; |
265 | | static std::map<unsigned long, unsigned char> unicode_to_mac_roman = { |
266 | | {0xc4, 0x80}, {0xc5, 0x81}, {0xc7, 0x82}, {0xc9, 0x83}, {0xd1, 0x84}, {0xd6, 0x85}, |
267 | | {0xdc, 0x86}, {0xe1, 0x87}, {0xe0, 0x88}, {0xe2, 0x89}, {0xe4, 0x8a}, {0xe3, 0x8b}, |
268 | | {0xe5, 0x8c}, {0xe7, 0x8d}, {0xe9, 0x8e}, {0xe8, 0x8f}, {0xea, 0x90}, {0xeb, 0x91}, |
269 | | {0xed, 0x92}, {0xec, 0x93}, {0xee, 0x94}, {0xef, 0x95}, {0xf1, 0x96}, {0xf3, 0x97}, |
270 | | {0xf2, 0x98}, {0xf4, 0x99}, {0xf6, 0x9a}, {0xf5, 0x9b}, {0xfa, 0x9c}, {0xf9, 0x9d}, |
271 | | {0xfb, 0x9e}, {0xfc, 0x9f}, {0x2020, 0xa0}, {0xb0, 0xa1}, {0xa2, 0xa2}, {0xa3, 0xa3}, |
272 | | {0xa7, 0xa4}, {0x2022, 0xa5}, {0xb6, 0xa6}, {0xdf, 0xa7}, {0xae, 0xa8}, {0xa9, 0xa9}, |
273 | | {0x2122, 0xaa}, {0x301, 0xab}, {0x308, 0xac}, {0xc6, 0xae}, {0xd8, 0xaf}, {0xb1, 0xb1}, |
274 | | {0xa5, 0xb4}, {0x3bc, 0xb5}, {0x1d43, 0xbb}, {0x1d52, 0xbc}, {0xe6, 0xbe}, {0xf8, 0xbf}, |
275 | | {0xbf, 0xc0}, {0xa1, 0xc1}, {0xac, 0xc2}, {0x192, 0xc4}, {0xab, 0xc7}, {0xbb, 0xc8}, |
276 | | {0x2026, 0xc9}, {0xc0, 0xcb}, {0xc3, 0xcc}, {0xd5, 0xcd}, {0x152, 0xce}, {0x153, 0xcf}, |
277 | | {0x2013, 0xd0}, {0x2014, 0xd1}, {0x201c, 0xd2}, {0x201d, 0xd3}, {0x2018, 0xd4}, {0x2019, 0xd5}, |
278 | | {0xf7, 0xd6}, {0xff, 0xd8}, {0x178, 0xd9}, {0x2044, 0xda}, {0xa4, 0xdb}, {0x2039, 0xdc}, |
279 | | {0x203a, 0xdd}, {0xfb01, 0xde}, {0xfb02, 0xdf}, {0x2021, 0xe0}, {0xb7, 0xe1}, {0x201a, 0xe2}, |
280 | | {0x201e, 0xe3}, {0x2030, 0xe4}, {0xc2, 0xe5}, {0xca, 0xe6}, {0xc1, 0xe7}, {0xcb, 0xe8}, |
281 | | {0xc8, 0xe9}, {0xcd, 0xea}, {0xce, 0xeb}, {0xcf, 0xec}, {0xcc, 0xed}, {0xd3, 0xee}, |
282 | | {0xd4, 0xef}, {0xd2, 0xf1}, {0xda, 0xf2}, {0xdb, 0xf3}, {0xd9, 0xf4}, {0x131, 0xf5}, |
283 | | {0x2c6, 0xf6}, {0x303, 0xf7}, {0x304, 0xf8}, {0x306, 0xf9}, {0x307, 0xfa}, {0x30a, 0xfb}, |
284 | | {0x327, 0xfc}, {0x30b, 0xfd}, {0x328, 0xfe}, {0x2c7, 0xff}, |
285 | | }; |
286 | | static std::map<unsigned long, unsigned char> unicode_to_pdf_doc = { |
287 | | {0x02d8, 0x18}, {0x02c7, 0x19}, {0x02c6, 0x1a}, {0x02d9, 0x1b}, {0x02dd, 0x1c}, {0x02db, 0x1d}, |
288 | | {0x02da, 0x1e}, {0x02dc, 0x1f}, {0x2022, 0x80}, {0x2020, 0x81}, {0x2021, 0x82}, {0x2026, 0x83}, |
289 | | {0x2014, 0x84}, {0x2013, 0x85}, {0x0192, 0x86}, {0x2044, 0x87}, {0x2039, 0x88}, {0x203a, 0x89}, |
290 | | {0x2212, 0x8a}, {0x2030, 0x8b}, {0x201e, 0x8c}, {0x201c, 0x8d}, {0x201d, 0x8e}, {0x2018, 0x8f}, |
291 | | {0x2019, 0x90}, {0x201a, 0x91}, {0x2122, 0x92}, {0xfb01, 0x93}, {0xfb02, 0x94}, {0x0141, 0x95}, |
292 | | {0x0152, 0x96}, {0x0160, 0x97}, {0x0178, 0x98}, {0x017d, 0x99}, {0x0131, 0x9a}, {0x0142, 0x9b}, |
293 | | {0x0153, 0x9c}, {0x0161, 0x9d}, {0x017e, 0x9e}, {0xfffd, 0x9f}, {0x20ac, 0xa0}, |
294 | | }; |
295 | | |
296 | | template <typename T> |
297 | | static std::string |
298 | | int_to_string_base_internal(T num, int base, int length) |
299 | 52.9k | { |
300 | | // Backward compatibility -- int_to_string, which calls this function, used to use sprintf with |
301 | | // %0*d, so we interpret length such that a negative value appends spaces and a positive value |
302 | | // prepends zeroes. |
303 | 52.9k | if (!((base == 8) || (base == 10) || (base == 16))) { |
304 | 0 | throw std::logic_error("int_to_string_base called with unsupported base"); |
305 | 0 | } |
306 | 52.9k | std::string cvt; |
307 | 52.9k | if (base == 10) { |
308 | | // Use the more efficient std::to_string when possible |
309 | 5.04k | cvt = std::to_string(num); |
310 | 47.9k | } else { |
311 | 47.9k | std::ostringstream buf; |
312 | 47.9k | buf.imbue(std::locale::classic()); |
313 | 47.9k | buf << std::setbase(base) << std::nouppercase << num; |
314 | 47.9k | cvt = buf.str(); |
315 | 47.9k | } |
316 | 52.9k | std::string result; |
317 | 52.9k | int str_length = QIntC::to_int(cvt.length()); |
318 | 52.9k | if ((length > 0) && (str_length < length)) { |
319 | 37.5k | result.append(QIntC::to_size(length - str_length), '0'); |
320 | 37.5k | } |
321 | 52.9k | result += cvt; |
322 | 52.9k | if ((length < 0) && (str_length < -length)) { |
323 | 0 | result.append(QIntC::to_size(-length - str_length), ' '); |
324 | 0 | } |
325 | 52.9k | return result; |
326 | 52.9k | } QUtil.cc:std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > int_to_string_base_internal<long long>(long long, int, int) Line | Count | Source | 299 | 52.9k | { | 300 | | // Backward compatibility -- int_to_string, which calls this function, used to use sprintf with | 301 | | // %0*d, so we interpret length such that a negative value appends spaces and a positive value | 302 | | // prepends zeroes. | 303 | 52.9k | if (!((base == 8) || (base == 10) || (base == 16))) { | 304 | 0 | throw std::logic_error("int_to_string_base called with unsupported base"); | 305 | 0 | } | 306 | 52.9k | std::string cvt; | 307 | 52.9k | if (base == 10) { | 308 | | // Use the more efficient std::to_string when possible | 309 | 5.04k | cvt = std::to_string(num); | 310 | 47.9k | } else { | 311 | 47.9k | std::ostringstream buf; | 312 | 47.9k | buf.imbue(std::locale::classic()); | 313 | 47.9k | buf << std::setbase(base) << std::nouppercase << num; | 314 | 47.9k | cvt = buf.str(); | 315 | 47.9k | } | 316 | 52.9k | std::string result; | 317 | 52.9k | int str_length = QIntC::to_int(cvt.length()); | 318 | 52.9k | if ((length > 0) && (str_length < length)) { | 319 | 37.5k | result.append(QIntC::to_size(length - str_length), '0'); | 320 | 37.5k | } | 321 | 52.9k | result += cvt; | 322 | 52.9k | if ((length < 0) && (str_length < -length)) { | 323 | 0 | result.append(QIntC::to_size(-length - str_length), ' '); | 324 | 0 | } | 325 | 52.9k | return result; | 326 | 52.9k | } |
Unexecuted instantiation: QUtil.cc:std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > int_to_string_base_internal<unsigned long long>(unsigned long long, int, int) |
327 | | |
328 | | std::string |
329 | | QUtil::int_to_string(long long num, int length) |
330 | 5.04k | { |
331 | 5.04k | return int_to_string_base(num, 10, length); |
332 | 5.04k | } |
333 | | |
334 | | std::string |
335 | | QUtil::uint_to_string(unsigned long long num, int length) |
336 | 0 | { |
337 | 0 | return uint_to_string_base(num, 10, length); |
338 | 0 | } |
339 | | |
340 | | std::string |
341 | | QUtil::int_to_string_base(long long num, int base, int length) |
342 | 52.9k | { |
343 | 52.9k | return int_to_string_base_internal(num, base, length); |
344 | 52.9k | } |
345 | | |
346 | | std::string |
347 | | QUtil::uint_to_string_base(unsigned long long num, int base, int length) |
348 | 0 | { |
349 | 0 | return int_to_string_base_internal(num, base, length); |
350 | 0 | } |
351 | | |
352 | | std::string |
353 | | QUtil::double_to_string(double num, int decimal_places, bool trim_trailing_zeroes) |
354 | 24.9k | { |
355 | | // Backward compatibility -- this code used to use sprintf and treated decimal_places <= 0 to |
356 | | // mean to use the default, which was six decimal places. Starting in 10.2, we trim trailing |
357 | | // zeroes by default. |
358 | 24.9k | if (decimal_places <= 0) { |
359 | 24.9k | decimal_places = 6; |
360 | 24.9k | } |
361 | 24.9k | std::ostringstream buf; |
362 | 24.9k | buf.imbue(std::locale::classic()); |
363 | 24.9k | buf << std::setprecision(decimal_places) << std::fixed << num; |
364 | 24.9k | std::string result = buf.str(); |
365 | 24.9k | if (trim_trailing_zeroes) { |
366 | 174k | while ((result.length() > 1) && (result.back() == '0')) { |
367 | 149k | result.pop_back(); |
368 | 149k | } |
369 | 24.9k | if ((result.length() > 1) && (result.back() == '.')) { |
370 | 24.9k | result.pop_back(); |
371 | 24.9k | } |
372 | 24.9k | } |
373 | 24.9k | return result; |
374 | 24.9k | } |
375 | | |
376 | | long long |
377 | | QUtil::string_to_ll(char const* str) |
378 | 3.57M | { |
379 | 3.57M | errno = 0; |
380 | | #ifdef _MSC_VER |
381 | | long long result = _strtoi64(str, 0, 10); |
382 | | #else |
383 | 3.57M | long long result = strtoll(str, nullptr, 10); |
384 | 3.57M | #endif |
385 | 3.57M | if (errno == ERANGE) { |
386 | 227 | throw std::range_error( |
387 | 227 | std::string("overflow/underflow converting ") + str + " to 64-bit integer"); |
388 | 227 | } |
389 | 3.57M | return result; |
390 | 3.57M | } |
391 | | |
392 | | int |
393 | | QUtil::string_to_int(char const* str) |
394 | 972k | { |
395 | | // QIntC::to_int does range checking |
396 | 972k | return QIntC::to_int(string_to_ll(str)); |
397 | 972k | } |
398 | | |
399 | | unsigned long long |
400 | | QUtil::string_to_ull(char const* str) |
401 | 0 | { |
402 | 0 | char const* p = str; |
403 | 0 | while (*p && util::is_space(*p)) { |
404 | 0 | ++p; |
405 | 0 | } |
406 | 0 | if (*p == '-') { |
407 | 0 | throw std::runtime_error( |
408 | 0 | std::string("underflow converting ") + str + " to 64-bit unsigned integer"); |
409 | 0 | } |
410 | | |
411 | 0 | errno = 0; |
412 | | #ifdef _MSC_VER |
413 | | unsigned long long result = _strtoui64(str, 0, 10); |
414 | | #else |
415 | 0 | unsigned long long result = strtoull(str, nullptr, 10); |
416 | 0 | #endif |
417 | 0 | if (errno == ERANGE) { |
418 | 0 | throw std::runtime_error( |
419 | 0 | std::string("overflow converting ") + str + " to 64-bit unsigned integer"); |
420 | 0 | } |
421 | 0 | return result; |
422 | 0 | } |
423 | | |
424 | | unsigned int |
425 | | QUtil::string_to_uint(char const* str) |
426 | 0 | { |
427 | | // QIntC::to_uint does range checking |
428 | 0 | return QIntC::to_uint(string_to_ull(str)); |
429 | 0 | } |
430 | | |
431 | | bool |
432 | | QUtil::is_long_long(char const* str) |
433 | 0 | { |
434 | 0 | try { |
435 | 0 | auto i1 = string_to_ll(str); |
436 | 0 | std::string s1 = int_to_string(i1); |
437 | 0 | return str == s1; |
438 | 0 | } catch (std::exception&) { |
439 | | // overflow or other error |
440 | 0 | } |
441 | 0 | return false; |
442 | 0 | } |
443 | | |
444 | | unsigned char* |
445 | | QUtil::unsigned_char_pointer(std::string const& str) |
446 | 140k | { |
447 | 140k | return reinterpret_cast<unsigned char*>(const_cast<char*>(str.c_str())); |
448 | 140k | } |
449 | | |
450 | | unsigned char* |
451 | | QUtil::unsigned_char_pointer(char const* str) |
452 | 1.80k | { |
453 | 1.80k | return reinterpret_cast<unsigned char*>(const_cast<char*>(str)); |
454 | 1.80k | } |
455 | | |
456 | | void |
457 | | QUtil::throw_system_error(std::string const& description) |
458 | 0 | { |
459 | 0 | throw QPDFSystemError(description, errno); |
460 | 0 | } |
461 | | |
462 | | int |
463 | | QUtil::os_wrapper(std::string const& description, int status) |
464 | 0 | { |
465 | 0 | if (status == -1) { |
466 | 0 | throw_system_error(description); |
467 | 0 | } |
468 | 0 | return status; |
469 | 0 | } |
470 | | |
471 | | #ifdef _WIN32 |
472 | | static std::shared_ptr<wchar_t> |
473 | | win_convert_filename(char const* filename) |
474 | | { |
475 | | // Convert the utf-8 encoded filename argument to wchar_t*. First, |
476 | | // convert to utf16, then to wchar_t*. Note that u16 will start |
477 | | // with the UTF16 marker, which we skip. |
478 | | std::string u16 = QUtil::utf8_to_utf16(filename); |
479 | | size_t len = u16.length(); |
480 | | size_t wlen = (len / 2) - 1; |
481 | | auto wfilenamep = QUtil::make_shared_array<wchar_t>(wlen + 1); |
482 | | wchar_t* wfilename = wfilenamep.get(); |
483 | | wfilename[wlen] = 0; |
484 | | for (unsigned int i = 2; i < len; i += 2) { |
485 | | wfilename[(i / 2) - 1] = static_cast<wchar_t>( |
486 | | (static_cast<unsigned char>(u16.at(i)) << 8) + |
487 | | static_cast<unsigned char>(u16.at(i + 1))); |
488 | | } |
489 | | return wfilenamep; |
490 | | } |
491 | | #endif |
492 | | |
493 | | FILE* |
494 | | QUtil::safe_fopen(char const* filename, char const* mode) |
495 | 0 | { |
496 | 0 | FILE* f = nullptr; |
497 | | #ifdef _WIN32 |
498 | | std::shared_ptr<wchar_t> wfilenamep = win_convert_filename(filename); |
499 | | wchar_t* wfilename = wfilenamep.get(); |
500 | | auto wmodep = QUtil::make_shared_array<wchar_t>(strlen(mode) + 1); |
501 | | wchar_t* wmode = wmodep.get(); |
502 | | wmode[strlen(mode)] = 0; |
503 | | for (size_t i = 0; i < strlen(mode); ++i) { |
504 | | wmode[i] = static_cast<wchar_t>(mode[i]); |
505 | | } |
506 | | |
507 | | # ifdef _MSC_VER |
508 | | errno_t err = _wfopen_s(&f, wfilename, wmode); |
509 | | if (err != 0) { |
510 | | errno = err; |
511 | | } |
512 | | # else |
513 | | f = _wfopen(wfilename, wmode); |
514 | | # endif |
515 | | if (f == 0) { |
516 | | throw_system_error(std::string("open ") + filename); |
517 | | } |
518 | | #else |
519 | 0 | f = fopen_wrapper(std::string("open ") + filename, fopen(filename, mode)); |
520 | 0 | #endif |
521 | 0 | return f; |
522 | 0 | } |
523 | | |
524 | | FILE* |
525 | | QUtil::fopen_wrapper(std::string const& description, FILE* f) |
526 | 0 | { |
527 | 0 | if (f == nullptr) { |
528 | 0 | throw_system_error(description); |
529 | 0 | } |
530 | 0 | return f; |
531 | 0 | } |
532 | | |
533 | | bool |
534 | | QUtil::file_can_be_opened(char const* filename) |
535 | 0 | { |
536 | 0 | try { |
537 | 0 | fclose(safe_fopen(filename, "rb")); |
538 | 0 | return true; |
539 | 0 | } catch (std::runtime_error&) { |
540 | | // can't open the file |
541 | 0 | } |
542 | 0 | return false; |
543 | 0 | } |
544 | | |
545 | | int |
546 | | QUtil::seek(FILE* stream, qpdf_offset_t offset, int whence) |
547 | 0 | { |
548 | 0 | #if HAVE_FSEEKO |
549 | 0 | return fseeko(stream, QIntC::IntConverter<qpdf_offset_t, off_t>::convert(offset), whence); |
550 | | #elif HAVE_FSEEKO64 |
551 | | return fseeko64(stream, offset, whence); |
552 | | #else |
553 | | # if defined _MSC_VER || defined __BORLANDC__ |
554 | | return _fseeki64(stream, offset, whence); |
555 | | # else |
556 | | return fseek(stream, QIntC::to_long(offset), whence); |
557 | | # endif |
558 | | #endif |
559 | 0 | } |
560 | | |
561 | | qpdf_offset_t |
562 | | QUtil::tell(FILE* stream) |
563 | 0 | { |
564 | 0 | #if HAVE_FSEEKO |
565 | 0 | return QIntC::to_offset(ftello(stream)); |
566 | | #elif HAVE_FSEEKO64 |
567 | | return QIntC::to_offset(ftello64(stream)); |
568 | | #else |
569 | | # if defined _MSC_VER || defined __BORLANDC__ |
570 | | return _ftelli64(stream); |
571 | | # else |
572 | | return QIntC::to_offset(ftell(stream)); |
573 | | # endif |
574 | | #endif |
575 | 0 | } |
576 | | |
577 | | bool |
578 | | QUtil::same_file(char const* name1, char const* name2) |
579 | 0 | { |
580 | 0 | if ((name1 == nullptr) || (strlen(name1) == 0) || (name2 == nullptr) || (strlen(name2) == 0)) { |
581 | 0 | return false; |
582 | 0 | } |
583 | | #ifdef _WIN32 |
584 | | bool same = false; |
585 | | # ifndef AVOID_WINDOWS_HANDLE |
586 | | HANDLE fh1 = CreateFile( |
587 | | name1, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); |
588 | | HANDLE fh2 = CreateFile( |
589 | | name2, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); |
590 | | BY_HANDLE_FILE_INFORMATION fi1; |
591 | | BY_HANDLE_FILE_INFORMATION fi2; |
592 | | if ((fh1 != INVALID_HANDLE_VALUE) && (fh2 != INVALID_HANDLE_VALUE) && |
593 | | GetFileInformationByHandle(fh1, &fi1) && GetFileInformationByHandle(fh2, &fi2) && |
594 | | (fi1.dwVolumeSerialNumber == fi2.dwVolumeSerialNumber) && |
595 | | (fi1.nFileIndexLow == fi2.nFileIndexLow) && (fi1.nFileIndexHigh == fi2.nFileIndexHigh)) { |
596 | | same = true; |
597 | | } |
598 | | if (fh1 != INVALID_HANDLE_VALUE) { |
599 | | CloseHandle(fh1); |
600 | | } |
601 | | if (fh2 != INVALID_HANDLE_VALUE) { |
602 | | CloseHandle(fh2); |
603 | | } |
604 | | # endif |
605 | | return same; |
606 | | #else |
607 | 0 | struct stat st1; |
608 | 0 | struct stat st2; |
609 | 0 | if ((stat(name1, &st1) == 0) && (stat(name2, &st2) == 0) && (st1.st_ino == st2.st_ino) && |
610 | 0 | (st1.st_dev == st2.st_dev)) { |
611 | 0 | return true; |
612 | 0 | } |
613 | 0 | #endif |
614 | 0 | return false; |
615 | 0 | } |
616 | | |
617 | | void |
618 | | QUtil::remove_file(char const* path) |
619 | 0 | { |
620 | | #ifdef _WIN32 |
621 | | std::shared_ptr<wchar_t> wpath = win_convert_filename(path); |
622 | | os_wrapper(std::string("remove ") + path, _wunlink(wpath.get())); |
623 | | #else |
624 | 0 | os_wrapper(std::string("remove ") + path, unlink(path)); |
625 | 0 | #endif |
626 | 0 | } |
627 | | |
628 | | void |
629 | | QUtil::rename_file(char const* oldname, char const* newname) |
630 | 0 | { |
631 | | #ifdef _WIN32 |
632 | | try { |
633 | | remove_file(newname); |
634 | | } catch (QPDFSystemError&) { |
635 | | // ignore |
636 | | } |
637 | | std::shared_ptr<wchar_t> wold = win_convert_filename(oldname); |
638 | | std::shared_ptr<wchar_t> wnew = win_convert_filename(newname); |
639 | | os_wrapper(std::string("rename ") + oldname + " " + newname, _wrename(wold.get(), wnew.get())); |
640 | | #else |
641 | 0 | os_wrapper(std::string("rename ") + oldname + " " + newname, rename(oldname, newname)); |
642 | 0 | #endif |
643 | 0 | } |
644 | | |
645 | | void |
646 | | QUtil::pipe_file(char const* filename, Pipeline* p) |
647 | 0 | { |
648 | | // Exercised in test suite by testing file_provider. |
649 | 0 | FILE* f = safe_fopen(filename, "rb"); |
650 | 0 | FileCloser fc(f); |
651 | 0 | size_t len = 0; |
652 | 0 | int constexpr size = 8192; |
653 | 0 | unsigned char buf[size]; |
654 | 0 | while ((len = fread(buf, 1, size, f)) > 0) { |
655 | 0 | p->write(buf, len); |
656 | 0 | } |
657 | 0 | p->finish(); |
658 | 0 | if (ferror(f)) { |
659 | 0 | throw std::runtime_error(std::string("failure reading file ") + filename); |
660 | 0 | } |
661 | 0 | } |
662 | | |
663 | | std::function<void(Pipeline*)> |
664 | | QUtil::file_provider(std::string const& filename) |
665 | 0 | { |
666 | 0 | return [filename](Pipeline* p) { pipe_file(filename.c_str(), p); }; |
667 | 0 | } |
668 | | |
669 | | std::string |
670 | | QUtil::path_basename(std::string const& filename) |
671 | 0 | { |
672 | | #ifdef _WIN32 |
673 | | char const* pathsep = "/\\"; |
674 | | #else |
675 | 0 | char const* pathsep = "/"; |
676 | 0 | #endif |
677 | 0 | std::string last = filename; |
678 | 0 | auto len = last.length(); |
679 | 0 | while (len > 1) { |
680 | 0 | auto pos = last.find_last_of(pathsep); |
681 | 0 | if (pos == len - 1) { |
682 | 0 | last.pop_back(); |
683 | 0 | --len; |
684 | 0 | } else if (pos == std::string::npos) { |
685 | 0 | break; |
686 | 0 | } else { |
687 | 0 | last = last.substr(pos + 1); |
688 | 0 | break; |
689 | 0 | } |
690 | 0 | } |
691 | 0 | return last; |
692 | 0 | } |
693 | | |
694 | | char* |
695 | | QUtil::copy_string(std::string const& str) |
696 | 0 | { |
697 | 0 | char* result = new char[str.length() + 1]; |
698 | | // Use memcpy in case string contains nulls |
699 | 0 | result[str.length()] = '\0'; |
700 | 0 | memcpy(result, str.c_str(), str.length()); |
701 | 0 | return result; |
702 | 0 | } |
703 | | |
704 | | std::shared_ptr<char> |
705 | | QUtil::make_shared_cstr(std::string const& str) |
706 | 0 | { |
707 | 0 | auto result = QUtil::make_shared_array<char>(str.length() + 1); |
708 | | // Use memcpy in case string contains nulls |
709 | 0 | result.get()[str.length()] = '\0'; |
710 | 0 | memcpy(result.get(), str.c_str(), str.length()); |
711 | 0 | return result; |
712 | 0 | } |
713 | | |
714 | | std::unique_ptr<char[]> |
715 | | QUtil::make_unique_cstr(std::string const& str) |
716 | 1.80k | { |
717 | 1.80k | auto result = std::make_unique<char[]>(str.length() + 1); |
718 | | // Use memcpy in case string contains nulls |
719 | 1.80k | result.get()[str.length()] = '\0'; |
720 | 1.80k | memcpy(result.get(), str.c_str(), str.length()); |
721 | 1.80k | return result; |
722 | 1.80k | } |
723 | | |
724 | | std::string |
725 | | QUtil::hex_encode(std::string const& input) |
726 | 5.58k | { |
727 | 5.58k | static auto constexpr hexchars = "0123456789abcdef"; |
728 | 5.58k | std::string result; |
729 | 5.58k | result.reserve(2 * input.length()); |
730 | 89.4k | for (const char c: input) { |
731 | 89.4k | result += hexchars[static_cast<unsigned char>(c) >> 4]; |
732 | 89.4k | result += hexchars[c & 0x0f]; |
733 | 89.4k | } |
734 | 5.58k | return result; |
735 | 5.58k | } |
736 | | |
737 | | std::string |
738 | | QUtil::hex_decode(std::string const& input) |
739 | 0 | { |
740 | 0 | std::string result; |
741 | | // We know result.size() <= 0.5 * input.size() + 1. However, reserving string space for this |
742 | | // upper bound has a negative impact. |
743 | 0 | bool first = true; |
744 | 0 | char decoded; |
745 | 0 | for (auto ch: input) { |
746 | 0 | ch = util::hex_decode_char(ch); |
747 | 0 | if (ch < '\20') { |
748 | 0 | if (first) { |
749 | 0 | decoded = static_cast<char>(ch << 4); |
750 | 0 | first = false; |
751 | 0 | } else { |
752 | 0 | result.push_back(decoded | ch); |
753 | 0 | first = true; |
754 | 0 | } |
755 | 0 | } |
756 | 0 | } |
757 | 0 | if (!first) { |
758 | 0 | result.push_back(decoded); |
759 | 0 | } |
760 | 0 | return result; |
761 | 0 | } |
762 | | |
763 | | void |
764 | | QUtil::binary_stdout() |
765 | 0 | { |
766 | | #if defined(_WIN32) && defined(__BORLANDC__) |
767 | | setmode(_fileno(stdout), _O_BINARY); |
768 | | #elif defined(_WIN32) |
769 | | _setmode(_fileno(stdout), _O_BINARY); |
770 | | #endif |
771 | 0 | } |
772 | | |
773 | | void |
774 | | QUtil::binary_stdin() |
775 | 0 | { |
776 | | #if defined(_WIN32) && defined(__BORLANDC__) |
777 | | setmode(_fileno(stdin), _O_BINARY); |
778 | | #elif defined(_WIN32) |
779 | | _setmode(_fileno(stdin), _O_BINARY); |
780 | | #endif |
781 | 0 | } |
782 | | |
783 | | void |
784 | | QUtil::setLineBuf(FILE* f) |
785 | 0 | { |
786 | 0 | #ifndef _WIN32 |
787 | 0 | setvbuf(f, reinterpret_cast<char*>(0), _IOLBF, 0); |
788 | 0 | #endif |
789 | 0 | } |
790 | | |
791 | | char* |
792 | | QUtil::getWhoami(char* argv0) |
793 | 0 | { |
794 | 0 | char* whoami = nullptr; |
795 | 0 | if (((whoami = strrchr(argv0, '/')) == nullptr) && |
796 | 0 | ((whoami = strrchr(argv0, '\\')) == nullptr)) { |
797 | 0 | whoami = argv0; |
798 | 0 | } else { |
799 | 0 | ++whoami; |
800 | 0 | } |
801 | |
|
802 | 0 | if ((strlen(whoami) > 4) && (strcmp(whoami + strlen(whoami) - 4, ".exe") == 0)) { |
803 | 0 | whoami[strlen(whoami) - 4] = '\0'; |
804 | 0 | } |
805 | |
|
806 | 0 | return whoami; |
807 | 0 | } |
808 | | |
809 | | bool |
810 | | QUtil::get_env(std::string const& var, std::string* value) |
811 | 1 | { |
812 | | // This was basically ripped out of wxWindows. |
813 | | #ifdef _WIN32 |
814 | | # ifdef NO_GET_ENVIRONMENT |
815 | | return false; |
816 | | # else |
817 | | // first get the size of the buffer |
818 | | DWORD len = ::GetEnvironmentVariable(var.c_str(), NULL, 0); |
819 | | if (len == 0) { |
820 | | // this means that there is no such variable |
821 | | return false; |
822 | | } |
823 | | |
824 | | if (value) { |
825 | | auto t = QUtil::make_shared_array<char>(len + 1); |
826 | | ::GetEnvironmentVariable(var.c_str(), t.get(), len); |
827 | | *value = t.get(); |
828 | | } |
829 | | |
830 | | return true; |
831 | | # endif |
832 | | #else |
833 | 1 | char* p = getenv(var.c_str()); |
834 | 1 | if (p == nullptr) { |
835 | 1 | return false; |
836 | 1 | } |
837 | 0 | if (value) { |
838 | 0 | *value = p; |
839 | 0 | } |
840 | |
|
841 | 0 | return true; |
842 | 1 | #endif |
843 | 1 | } |
844 | | |
845 | | time_t |
846 | | QUtil::get_current_time() |
847 | 0 | { |
848 | | #ifdef _WIN32 |
849 | | // The procedure to get local time at this resolution comes from |
850 | | // the Microsoft documentation. It says to convert a SYSTEMTIME |
851 | | // to a FILETIME, and to copy the FILETIME to a ULARGE_INTEGER. |
852 | | // The resulting number is the number of 100-nanosecond intervals |
853 | | // between January 1, 1601 and now. POSIX threads wants a time |
854 | | // based on January 1, 1970, so we adjust by subtracting the |
855 | | // number of seconds in that time period from the result we get |
856 | | // here. |
857 | | SYSTEMTIME sysnow; |
858 | | GetSystemTime(&sysnow); |
859 | | FILETIME filenow; |
860 | | SystemTimeToFileTime(&sysnow, &filenow); |
861 | | ULARGE_INTEGER uinow; |
862 | | uinow.LowPart = filenow.dwLowDateTime; |
863 | | uinow.HighPart = filenow.dwHighDateTime; |
864 | | ULONGLONG now = uinow.QuadPart; |
865 | | return static_cast<time_t>((now / 10000000ULL) - 11644473600ULL); |
866 | | #else |
867 | 0 | return time(nullptr); |
868 | 0 | #endif |
869 | 0 | } |
870 | | |
871 | | QUtil::QPDFTime |
872 | | QUtil::get_current_qpdf_time() |
873 | 0 | { |
874 | | #ifdef _WIN32 |
875 | | SYSTEMTIME ltime; |
876 | | GetLocalTime(<ime); |
877 | | TIME_ZONE_INFORMATION tzinfo; |
878 | | GetTimeZoneInformation(&tzinfo); |
879 | | return QPDFTime( |
880 | | static_cast<int>(ltime.wYear), |
881 | | static_cast<int>(ltime.wMonth), |
882 | | static_cast<int>(ltime.wDay), |
883 | | static_cast<int>(ltime.wHour), |
884 | | static_cast<int>(ltime.wMinute), |
885 | | static_cast<int>(ltime.wSecond), |
886 | | // tzinfo.Bias is minutes before UTC |
887 | | static_cast<int>(tzinfo.Bias)); |
888 | | #else |
889 | 0 | struct tm ltime; |
890 | 0 | time_t now = time(nullptr); |
891 | 0 | tzset(); |
892 | 0 | # ifdef HAVE_LOCALTIME_R |
893 | 0 | localtime_r(&now, <ime); |
894 | | # else |
895 | | ltime = *localtime(&now); |
896 | | # endif |
897 | | # if HAVE_TM_GMTOFF |
898 | | // tm_gmtoff is seconds after UTC |
899 | | int tzoff = -static_cast<int>(ltime.tm_gmtoff / 60); |
900 | | # elif HAVE_EXTERN_LONG_TIMEZONE |
901 | | // timezone is seconds before UTC, not adjusted for daylight saving time |
902 | 0 | int tzoff = static_cast<int>(timezone / 60); |
903 | | # else |
904 | | // Don't know how to get timezone on this platform |
905 | | int tzoff = 0; |
906 | | # endif |
907 | 0 | return { |
908 | 0 | static_cast<int>(ltime.tm_year + 1900), |
909 | 0 | static_cast<int>(ltime.tm_mon + 1), |
910 | 0 | static_cast<int>(ltime.tm_mday), |
911 | 0 | static_cast<int>(ltime.tm_hour), |
912 | 0 | static_cast<int>(ltime.tm_min), |
913 | 0 | static_cast<int>(ltime.tm_sec), |
914 | 0 | tzoff}; |
915 | 0 | #endif |
916 | 0 | } |
917 | | |
918 | | std::string |
919 | | QUtil::qpdf_time_to_pdf_time(QPDFTime const& qtm) |
920 | 0 | { |
921 | 0 | std::string tz_offset; |
922 | 0 | int t = qtm.tz_delta; |
923 | 0 | if (t == 0) { |
924 | 0 | tz_offset = "Z"; |
925 | 0 | } else { |
926 | 0 | if (t < 0) { |
927 | 0 | t = -t; |
928 | 0 | tz_offset += "+"; |
929 | 0 | } else { |
930 | 0 | tz_offset += "-"; |
931 | 0 | } |
932 | 0 | tz_offset += QUtil::int_to_string(t / 60, 2) + "'" + QUtil::int_to_string(t % 60, 2) + "'"; |
933 | 0 | } |
934 | 0 | return ( |
935 | 0 | "D:" + QUtil::int_to_string(qtm.year, 4) + QUtil::int_to_string(qtm.month, 2) + |
936 | 0 | QUtil::int_to_string(qtm.day, 2) + QUtil::int_to_string(qtm.hour, 2) + |
937 | 0 | QUtil::int_to_string(qtm.minute, 2) + QUtil::int_to_string(qtm.second, 2) + tz_offset); |
938 | 0 | } |
939 | | |
940 | | std::string |
941 | | QUtil::qpdf_time_to_iso8601(QPDFTime const& qtm) |
942 | 0 | { |
943 | 0 | std::string tz_offset; |
944 | 0 | int t = qtm.tz_delta; |
945 | 0 | if (t == 0) { |
946 | 0 | tz_offset = "Z"; |
947 | 0 | } else { |
948 | 0 | if (t < 0) { |
949 | 0 | t = -t; |
950 | 0 | tz_offset += "+"; |
951 | 0 | } else { |
952 | 0 | tz_offset += "-"; |
953 | 0 | } |
954 | 0 | tz_offset += QUtil::int_to_string(t / 60, 2) + ":" + QUtil::int_to_string(t % 60, 2); |
955 | 0 | } |
956 | 0 | return ( |
957 | 0 | QUtil::int_to_string(qtm.year, 4) + "-" + QUtil::int_to_string(qtm.month, 2) + "-" + |
958 | 0 | QUtil::int_to_string(qtm.day, 2) + "T" + QUtil::int_to_string(qtm.hour, 2) + ":" + |
959 | 0 | QUtil::int_to_string(qtm.minute, 2) + ":" + QUtil::int_to_string(qtm.second, 2) + |
960 | 0 | tz_offset); |
961 | 0 | } |
962 | | |
963 | | bool |
964 | | QUtil::pdf_time_to_qpdf_time(std::string const& str, QPDFTime* qtm) |
965 | 0 | { |
966 | 0 | static std::regex pdf_date( |
967 | 0 | "^D:([0-9]{4})([0-9]{2})([0-9]{2})" |
968 | 0 | "([0-9]{2})([0-9]{2})([0-9]{2})" |
969 | 0 | "(?:(Z?)|([\\+\\-])([0-9]{2})'([0-9]{2})')$"); |
970 | 0 | std::smatch m; |
971 | 0 | if (!std::regex_match(str, m, pdf_date)) { |
972 | 0 | return false; |
973 | 0 | } |
974 | 0 | int tz_delta = 0; |
975 | 0 | auto to_i = [](std::string const& s) { return QUtil::string_to_int(s.c_str()); }; |
976 | |
|
977 | 0 | if (m[8] != "") { |
978 | 0 | tz_delta = ((to_i(m[9]) * 60) + to_i(m[10])); |
979 | 0 | if (m[8] == "+") { |
980 | 0 | tz_delta = -tz_delta; |
981 | 0 | } |
982 | 0 | } |
983 | 0 | if (qtm) { |
984 | 0 | *qtm = QPDFTime( |
985 | 0 | to_i(m[1]), to_i(m[2]), to_i(m[3]), to_i(m[4]), to_i(m[5]), to_i(m[6]), tz_delta); |
986 | 0 | } |
987 | 0 | return true; |
988 | 0 | } |
989 | | |
990 | | bool |
991 | | QUtil::pdf_time_to_iso8601(std::string const& pdf_time, std::string& iso8601) |
992 | 0 | { |
993 | 0 | QPDFTime qtm; |
994 | 0 | if (pdf_time_to_qpdf_time(pdf_time, &qtm)) { |
995 | 0 | iso8601 = qpdf_time_to_iso8601(qtm); |
996 | 0 | return true; |
997 | 0 | } |
998 | 0 | return false; |
999 | 0 | } |
1000 | | |
1001 | | std::string |
1002 | | QUtil::toUTF8(unsigned long uval) |
1003 | 0 | { |
1004 | 0 | std::string result; |
1005 | | |
1006 | | // A UTF-8 encoding of a Unicode value is a single byte for Unicode values <= 127. For larger |
1007 | | // values, the first byte of the UTF-8 encoding has '1' as each of its n highest bits and '0' |
1008 | | // for its (n+1)th highest bit where n is the total number of bytes required. Subsequent bytes |
1009 | | // start with '10' and have the remaining 6 bits free for encoding. For example, an 11-bit |
1010 | | // Unicode value can be stored in two bytes where the first is 110zzzzz, the second is 10zzzzzz, |
1011 | | // and the z's represent the remaining bits. |
1012 | |
|
1013 | 0 | if (uval > 0x7fffffff) { |
1014 | 0 | throw std::runtime_error("bounds error in QUtil::toUTF8"); |
1015 | 0 | } else if (uval < 128) { |
1016 | 0 | result += static_cast<char>(uval); |
1017 | 0 | } else { |
1018 | 0 | unsigned char bytes[7]; |
1019 | 0 | bytes[6] = '\0'; |
1020 | 0 | unsigned char* cur_byte = &bytes[5]; |
1021 | | |
1022 | | // maximum value that will fit in the current number of bytes |
1023 | 0 | unsigned char maxval = 0x3f; // six bits |
1024 | |
|
1025 | 0 | while (uval > QIntC::to_ulong(maxval)) { |
1026 | | // Assign low six bits plus 10000000 to lowest unused byte position, then shift |
1027 | 0 | *cur_byte = static_cast<unsigned char>(0x80 + (uval & 0x3f)); |
1028 | 0 | uval >>= 6; |
1029 | | // Maximum that will fit in high byte now shrinks by one bit |
1030 | 0 | maxval = static_cast<unsigned char>(maxval >> 1); |
1031 | | // Slide to the left one byte |
1032 | 0 | if (cur_byte <= bytes) { |
1033 | 0 | throw std::logic_error("QUtil::toUTF8: overflow error"); |
1034 | 0 | } |
1035 | 0 | --cur_byte; |
1036 | 0 | } |
1037 | | // If maxval is k bits long, the high (7 - k) bits of the resulting byte must be high. |
1038 | 0 | *cur_byte = static_cast<unsigned char>(QIntC::to_ulong(0xff - (1 + (maxval << 1))) + uval); |
1039 | |
|
1040 | 0 | result += reinterpret_cast<char*>(cur_byte); |
1041 | 0 | } |
1042 | | |
1043 | 0 | return result; |
1044 | 0 | } |
1045 | | |
1046 | | std::string |
1047 | | QUtil::toUTF16(unsigned long uval) |
1048 | 0 | { |
1049 | 0 | std::string result; |
1050 | 0 | if ((uval >= 0xd800) && (uval <= 0xdfff)) { |
1051 | 0 | result = "\xff\xfd"; |
1052 | 0 | } else if (uval <= 0xffff) { |
1053 | 0 | char out[2]; |
1054 | 0 | out[0] = static_cast<char>((uval & 0xff00) >> 8); |
1055 | 0 | out[1] = static_cast<char>(uval & 0xff); |
1056 | 0 | result = std::string(out, 2); |
1057 | 0 | } else if (uval <= 0x10ffff) { |
1058 | 0 | char out[4]; |
1059 | 0 | uval -= 0x10000; |
1060 | 0 | unsigned short high = static_cast<unsigned short>(((uval & 0xffc00) >> 10) + 0xd800); |
1061 | 0 | unsigned short low = static_cast<unsigned short>((uval & 0x3ff) + 0xdc00); |
1062 | 0 | out[0] = static_cast<char>((high & 0xff00) >> 8); |
1063 | 0 | out[1] = static_cast<char>(high & 0xff); |
1064 | 0 | out[2] = static_cast<char>((low & 0xff00) >> 8); |
1065 | 0 | out[3] = static_cast<char>(low & 0xff); |
1066 | 0 | result = std::string(out, 4); |
1067 | 0 | } else { |
1068 | 0 | result = "\xff\xfd"; |
1069 | 0 | } |
1070 | |
|
1071 | 0 | return result; |
1072 | 0 | } |
1073 | | |
1074 | | // Random data support |
1075 | | |
1076 | | namespace |
1077 | | { |
1078 | | class RandomDataProviderProvider |
1079 | | { |
1080 | | public: |
1081 | | RandomDataProviderProvider(); |
1082 | | void setProvider(RandomDataProvider*); |
1083 | | RandomDataProvider* getProvider(); |
1084 | | |
1085 | | private: |
1086 | | RandomDataProvider* default_provider; |
1087 | | RandomDataProvider* current_provider{nullptr}; |
1088 | | }; |
1089 | | } // namespace |
1090 | | |
1091 | | RandomDataProviderProvider::RandomDataProviderProvider() : |
1092 | 1 | default_provider(CryptoRandomDataProvider::getInstance()) |
1093 | 1 | { |
1094 | 1 | this->current_provider = default_provider; |
1095 | 1 | } |
1096 | | |
1097 | | RandomDataProvider* |
1098 | | RandomDataProviderProvider::getProvider() |
1099 | 159 | { |
1100 | 159 | return this->current_provider; |
1101 | 159 | } |
1102 | | |
1103 | | void |
1104 | | RandomDataProviderProvider::setProvider(RandomDataProvider* p) |
1105 | 0 | { |
1106 | 0 | this->current_provider = p ? p : this->default_provider; |
1107 | 0 | } |
1108 | | |
1109 | | static RandomDataProviderProvider* |
1110 | | getRandomDataProviderProvider() |
1111 | 159 | { |
1112 | | // Thread-safe static initializer |
1113 | 159 | static RandomDataProviderProvider rdpp; |
1114 | 159 | return &rdpp; |
1115 | 159 | } |
1116 | | |
1117 | | void |
1118 | | QUtil::setRandomDataProvider(RandomDataProvider* p) |
1119 | 0 | { |
1120 | 0 | getRandomDataProviderProvider()->setProvider(p); |
1121 | 0 | } |
1122 | | |
1123 | | RandomDataProvider* |
1124 | | QUtil::getRandomDataProvider() |
1125 | 159 | { |
1126 | 159 | return getRandomDataProviderProvider()->getProvider(); |
1127 | 159 | } |
1128 | | |
1129 | | void |
1130 | | QUtil::initializeWithRandomBytes(unsigned char* data, size_t len) |
1131 | 159 | { |
1132 | 159 | getRandomDataProvider()->provideRandomData(data, len); |
1133 | 159 | } |
1134 | | |
1135 | | std::string |
1136 | | util::random_string(size_t len) |
1137 | 0 | { |
1138 | 0 | std::string result(len, '\0'); |
1139 | 0 | QUtil::initializeWithRandomBytes(reinterpret_cast<unsigned char*>(result.data()), len); |
1140 | 0 | return result; |
1141 | 0 | } |
1142 | | |
1143 | | long |
1144 | | QUtil::random() |
1145 | 0 | { |
1146 | 0 | long result = 0L; |
1147 | 0 | initializeWithRandomBytes(reinterpret_cast<unsigned char*>(&result), sizeof(result)); |
1148 | 0 | return result; |
1149 | 0 | } |
1150 | | |
1151 | | void |
1152 | | QUtil::read_file_into_memory(char const* filename, std::shared_ptr<char>& file_buf, size_t& size) |
1153 | 0 | { |
1154 | 0 | FILE* f = safe_fopen(filename, "rb"); |
1155 | 0 | FileCloser fc(f); |
1156 | 0 | fseek(f, 0, SEEK_END); |
1157 | 0 | size = QIntC::to_size(QUtil::tell(f)); |
1158 | 0 | fseek(f, 0, SEEK_SET); |
1159 | 0 | file_buf = QUtil::make_shared_array<char>(size); |
1160 | 0 | char* buf_p = file_buf.get(); |
1161 | 0 | size_t bytes_read = 0; |
1162 | 0 | size_t len = 0; |
1163 | 0 | while ((len = fread(buf_p + bytes_read, 1, size - bytes_read, f)) > 0) { |
1164 | 0 | bytes_read += len; |
1165 | 0 | } |
1166 | 0 | if (bytes_read != size) { |
1167 | 0 | if (ferror(f)) { |
1168 | 0 | throw std::runtime_error( |
1169 | 0 | std::string("failure reading file ") + filename + " into memory: read " + |
1170 | 0 | uint_to_string(bytes_read) + "; wanted " + uint_to_string(size)); |
1171 | 0 | } else { |
1172 | 0 | throw std::runtime_error( |
1173 | 0 | std::string("premature eof reading file ") + filename + " into memory: read " + |
1174 | 0 | uint_to_string(bytes_read) + "; wanted " + uint_to_string(size)); |
1175 | 0 | } |
1176 | 0 | } |
1177 | 0 | } |
1178 | | |
1179 | | std::string |
1180 | | QUtil::read_file_into_string(char const* filename) |
1181 | 0 | { |
1182 | 0 | FILE* f = safe_fopen(filename, "rb"); |
1183 | 0 | FileCloser fc(f); |
1184 | 0 | return read_file_into_string(f, filename); |
1185 | 0 | } |
1186 | | |
1187 | | std::string |
1188 | | QUtil::read_file_into_string(FILE* f, std::string_view filename) |
1189 | 0 | { |
1190 | 0 | fseek(f, 0, SEEK_END); |
1191 | 0 | auto o_size = QUtil::tell(f); |
1192 | 0 | if (o_size >= 0) { |
1193 | | // Seekable file |
1194 | 0 | auto size = QIntC::to_size(o_size); |
1195 | 0 | fseek(f, 0, SEEK_SET); |
1196 | 0 | std::string result(size, '\0'); |
1197 | 0 | if (auto n_read = fread(result.data(), 1, size, f); n_read != size) { |
1198 | 0 | if (ferror(f)) { |
1199 | 0 | throw std::runtime_error( |
1200 | 0 | std::string("failure reading file ") + std::string(filename) + |
1201 | 0 | " into memory: read " + uint_to_string(n_read) + "; wanted " + |
1202 | 0 | uint_to_string(size)); |
1203 | 0 | } else { |
1204 | 0 | throw std::runtime_error( |
1205 | 0 | std::string("premature eof reading file ") + std::string(filename) + |
1206 | 0 | " into memory: read " + uint_to_string(n_read) + "; wanted " + |
1207 | 0 | uint_to_string(size)); |
1208 | 0 | } |
1209 | 0 | } |
1210 | 0 | return result; |
1211 | 0 | } else { |
1212 | | // Pipe or other non-seekable file |
1213 | 0 | size_t buf_size = 8192; |
1214 | 0 | auto n_read = buf_size; |
1215 | 0 | std::string buffer(buf_size, '\0'); |
1216 | 0 | std::string result; |
1217 | 0 | while (n_read == buf_size) { |
1218 | 0 | n_read = fread(buffer.data(), 1, buf_size, f); |
1219 | 0 | buffer.erase(n_read); |
1220 | 0 | result.append(buffer); |
1221 | 0 | } |
1222 | 0 | if (ferror(f)) { |
1223 | 0 | throw std::runtime_error( |
1224 | 0 | std::string("failure reading file ") + std::string(filename) + " into memory"); |
1225 | 0 | } |
1226 | 0 | return result; |
1227 | 0 | } |
1228 | 0 | } |
1229 | | |
1230 | | static bool |
1231 | | read_char_from_FILE(char& ch, FILE* f) |
1232 | 0 | { |
1233 | 0 | auto len = fread(&ch, 1, 1, f); |
1234 | 0 | if (len == 0) { |
1235 | 0 | if (ferror(f)) { |
1236 | 0 | throw std::runtime_error("failure reading character from file"); |
1237 | 0 | } |
1238 | 0 | return false; |
1239 | 0 | } |
1240 | 0 | return true; |
1241 | 0 | } |
1242 | | |
1243 | | std::list<std::string> |
1244 | | QUtil::read_lines_from_file(char const* filename, bool preserve_eol) |
1245 | 0 | { |
1246 | 0 | std::list<std::string> lines; |
1247 | 0 | FILE* f = safe_fopen(filename, "rb"); |
1248 | 0 | FileCloser fc(f); |
1249 | 0 | auto next_char = [&f](char& ch) { return read_char_from_FILE(ch, f); }; |
1250 | 0 | read_lines_from_file(next_char, lines, preserve_eol); |
1251 | 0 | return lines; |
1252 | 0 | } |
1253 | | |
1254 | | std::list<std::string> |
1255 | | QUtil::read_lines_from_file(std::istream& in, bool preserve_eol) |
1256 | 0 | { |
1257 | 0 | std::list<std::string> lines; |
1258 | 0 | auto next_char = [&in](char& ch) { return (in.get(ch)) ? true : false; }; |
1259 | 0 | read_lines_from_file(next_char, lines, preserve_eol); |
1260 | 0 | return lines; |
1261 | 0 | } |
1262 | | |
1263 | | std::list<std::string> |
1264 | | QUtil::read_lines_from_file(FILE* f, bool preserve_eol) |
1265 | 0 | { |
1266 | 0 | std::list<std::string> lines; |
1267 | 0 | auto next_char = [&f](char& ch) { return read_char_from_FILE(ch, f); }; |
1268 | 0 | read_lines_from_file(next_char, lines, preserve_eol); |
1269 | 0 | return lines; |
1270 | 0 | } |
1271 | | |
1272 | | void |
1273 | | QUtil::read_lines_from_file( |
1274 | | std::function<bool(char&)> next_char, std::list<std::string>& lines, bool preserve_eol) |
1275 | 0 | { |
1276 | 0 | std::string* buf = nullptr; |
1277 | 0 | char c; |
1278 | 0 | while (next_char(c)) { |
1279 | 0 | if (buf == nullptr) { |
1280 | 0 | lines.emplace_back(""); |
1281 | 0 | buf = &(lines.back()); |
1282 | 0 | buf->reserve(80); |
1283 | 0 | } |
1284 | |
|
1285 | 0 | if (buf->capacity() == buf->size()) { |
1286 | 0 | buf->reserve(buf->capacity() * 2); |
1287 | 0 | } |
1288 | 0 | if (c == '\n') { |
1289 | 0 | if (preserve_eol) { |
1290 | 0 | buf->append(1, c); |
1291 | 0 | } else { |
1292 | | // Remove any carriage return that preceded the newline and discard the newline |
1293 | 0 | if ((!buf->empty()) && ((*(buf->rbegin())) == '\r')) { |
1294 | 0 | buf->erase(buf->length() - 1); |
1295 | 0 | } |
1296 | 0 | } |
1297 | 0 | buf = nullptr; |
1298 | 0 | } else { |
1299 | 0 | buf->append(1, c); |
1300 | 0 | } |
1301 | 0 | } |
1302 | 0 | } |
1303 | | |
1304 | | int |
1305 | | QUtil::str_compare_nocase(char const* s1, char const* s2) |
1306 | 0 | { |
1307 | | #if defined(_WIN32) && defined(__BORLANDC__) |
1308 | | return stricmp(s1, s2); |
1309 | | #elif defined(_WIN32) |
1310 | | return _stricmp(s1, s2); |
1311 | | #else |
1312 | 0 | return strcasecmp(s1, s2); |
1313 | 0 | #endif |
1314 | 0 | } |
1315 | | |
1316 | | std::vector<int> |
1317 | | QUtil::parse_numrange(char const* range, int max) |
1318 | 0 | { |
1319 | | // Performance note: this implementation aims to be straightforward, not efficient. Numeric |
1320 | | // range parsing is used only during argument processing. It is not used during processing of |
1321 | | // PDF files. |
1322 | |
|
1323 | 0 | static std::regex group_re(R"((x)?(z|r?\d+)(?:-(z|r?\d+))?)"); |
1324 | 0 | auto parse_num = [&max](std::string const& s) -> int { |
1325 | 0 | if (s == "z") { |
1326 | 0 | return max; |
1327 | 0 | } |
1328 | 0 | int num; |
1329 | 0 | if (s.at(0) == 'r') { |
1330 | 0 | num = max + 1 - string_to_int(s.substr(1).c_str()); |
1331 | 0 | } else { |
1332 | 0 | num = string_to_int(s.c_str()); |
1333 | 0 | } |
1334 | | // max == 0 means we don't know the max and are just testing for valid syntax. |
1335 | 0 | if ((max > 0) && ((num < 1) || (num > max))) { |
1336 | 0 | throw std::runtime_error("number " + std::to_string(num) + " out of range"); |
1337 | 0 | } |
1338 | 0 | return num; |
1339 | 0 | }; |
1340 | |
|
1341 | 0 | auto populate = [](std::vector<int>& group, int first_num, bool is_span, int last_num) { |
1342 | 0 | group.clear(); |
1343 | 0 | group.emplace_back(first_num); |
1344 | 0 | if (is_span) { |
1345 | 0 | if (first_num > last_num) { |
1346 | 0 | for (auto i = first_num - 1; i >= last_num; --i) { |
1347 | 0 | group.push_back(i); |
1348 | 0 | } |
1349 | 0 | } else { |
1350 | 0 | for (auto i = first_num + 1; i <= last_num; ++i) { |
1351 | 0 | group.push_back(i); |
1352 | 0 | } |
1353 | 0 | } |
1354 | 0 | } |
1355 | 0 | }; |
1356 | |
|
1357 | 0 | char const* p; |
1358 | 0 | try { |
1359 | 0 | char const* range_end = range + strlen(range); |
1360 | 0 | std::vector<int> result; |
1361 | 0 | std::vector<int> last_group; |
1362 | | // See if range ends with :even or :odd. |
1363 | 0 | size_t start_idx = 0; |
1364 | 0 | size_t skip = 1; |
1365 | 0 | p = std::find(range, range_end, ':'); |
1366 | 0 | if (*p == ':') { |
1367 | 0 | if (strcmp(p, ":odd") == 0) { |
1368 | 0 | skip = 2; |
1369 | 0 | } else if (strcmp(p, ":even") == 0) { |
1370 | 0 | skip = 2; |
1371 | 0 | start_idx = 1; |
1372 | 0 | } else { |
1373 | 0 | throw std::runtime_error("expected :even or :odd"); |
1374 | 0 | } |
1375 | 0 | range_end = p; |
1376 | 0 | } |
1377 | | |
1378 | | // Divide the range into groups |
1379 | 0 | p = range; |
1380 | 0 | char const* group_end; |
1381 | 0 | bool first = true; |
1382 | 0 | while (p != range_end) { |
1383 | 0 | group_end = std::find(p, range_end, ','); |
1384 | 0 | std::cmatch m; |
1385 | 0 | if (!std::regex_match(p, group_end, m, group_re)) { |
1386 | 0 | throw std::runtime_error("invalid range syntax"); |
1387 | 0 | } |
1388 | 0 | auto is_exclude = m[1].matched; |
1389 | 0 | if (first && is_exclude) { |
1390 | 0 | throw std::runtime_error("first range group may not be an exclusion"); |
1391 | 0 | } |
1392 | 0 | first = false; |
1393 | 0 | auto first_num = parse_num(m[2].str()); |
1394 | 0 | auto is_span = m[3].matched; |
1395 | 0 | int last_num{0}; |
1396 | 0 | if (is_span) { |
1397 | 0 | last_num = parse_num(m[3].str()); |
1398 | 0 | } |
1399 | 0 | if (is_exclude) { |
1400 | 0 | std::vector<int> work; |
1401 | 0 | populate(work, first_num, is_span, last_num); |
1402 | 0 | std::set<int> exclusions; |
1403 | 0 | exclusions.insert(work.begin(), work.end()); |
1404 | 0 | work = last_group; |
1405 | 0 | last_group.clear(); |
1406 | 0 | for (auto n: work) { |
1407 | 0 | if (!exclusions.contains(n)) { |
1408 | 0 | last_group.emplace_back(n); |
1409 | 0 | } |
1410 | 0 | } |
1411 | 0 | } else { |
1412 | 0 | result.insert(result.end(), last_group.begin(), last_group.end()); |
1413 | 0 | populate(last_group, first_num, is_span, last_num); |
1414 | 0 | } |
1415 | 0 | p = group_end; |
1416 | 0 | if (*p == ',') { |
1417 | 0 | ++p; |
1418 | 0 | if (p == range_end) { |
1419 | 0 | throw std::runtime_error("trailing comma"); |
1420 | 0 | } |
1421 | 0 | } |
1422 | 0 | } |
1423 | 0 | result.insert(result.end(), last_group.begin(), last_group.end()); |
1424 | 0 | if (skip == 1) { |
1425 | 0 | return result; |
1426 | 0 | } |
1427 | 0 | std::vector<int> filtered; |
1428 | 0 | for (auto i = start_idx; i < result.size(); i += skip) { |
1429 | 0 | filtered.emplace_back(result.at(i)); |
1430 | 0 | } |
1431 | 0 | return filtered; |
1432 | 0 | } catch (std::runtime_error const& e) { |
1433 | 0 | std::string message; |
1434 | 0 | if (p) { |
1435 | 0 | message = "error at * in numeric range " + |
1436 | 0 | std::string(range, QIntC::to_size(p - range)) + "*" + p + ": " + e.what(); |
1437 | 0 | } else { |
1438 | 0 | message = "error in numeric range " + std::string(range) + ": " + e.what(); |
1439 | 0 | } |
1440 | 0 | throw std::runtime_error(message); |
1441 | 0 | } |
1442 | 0 | } |
1443 | | |
1444 | | enum encoding_e { e_utf16, e_ascii, e_winansi, e_macroman, e_pdfdoc }; |
1445 | | |
1446 | | static unsigned char |
1447 | | encode_winansi(unsigned long codepoint) |
1448 | 0 | { |
1449 | 0 | auto i = unicode_to_win_ansi.find(codepoint); |
1450 | 0 | if (i != unicode_to_win_ansi.end()) { |
1451 | 0 | return i->second; |
1452 | 0 | } |
1453 | 0 | return '\0'; |
1454 | 0 | } |
1455 | | |
1456 | | static unsigned char |
1457 | | encode_macroman(unsigned long codepoint) |
1458 | 0 | { |
1459 | 0 | auto i = unicode_to_mac_roman.find(codepoint); |
1460 | 0 | if (i != unicode_to_mac_roman.end()) { |
1461 | 0 | return i->second; |
1462 | 0 | } |
1463 | 0 | return '\0'; |
1464 | 0 | } |
1465 | | |
1466 | | static unsigned char |
1467 | | encode_pdfdoc(unsigned long codepoint) |
1468 | 0 | { |
1469 | 0 | auto i = unicode_to_pdf_doc.find(codepoint); |
1470 | 0 | if (i != unicode_to_pdf_doc.end()) { |
1471 | 0 | return i->second; |
1472 | 0 | } |
1473 | 0 | return '\0'; |
1474 | 0 | } |
1475 | | |
1476 | | unsigned long |
1477 | | QUtil::get_next_utf8_codepoint(std::string const& utf8_val, size_t& pos, bool& error) |
1478 | 0 | { |
1479 | 0 | auto o_pos = pos; |
1480 | 0 | size_t len = utf8_val.length(); |
1481 | 0 | unsigned char ch = static_cast<unsigned char>(utf8_val.at(pos++)); |
1482 | 0 | error = false; |
1483 | 0 | if (ch < 128) { |
1484 | 0 | return static_cast<unsigned long>(ch); |
1485 | 0 | } |
1486 | | |
1487 | 0 | size_t bytes_needed = 0; |
1488 | 0 | unsigned bit_check = 0x40; |
1489 | 0 | unsigned char to_clear = 0x80; |
1490 | 0 | while (ch & bit_check) { |
1491 | 0 | ++bytes_needed; |
1492 | 0 | to_clear = static_cast<unsigned char>(to_clear | bit_check); |
1493 | 0 | bit_check >>= 1; |
1494 | 0 | } |
1495 | 0 | if (((bytes_needed > 5) || (bytes_needed < 1)) || ((pos + bytes_needed) > len)) { |
1496 | 0 | error = true; |
1497 | 0 | return 0xfffd; |
1498 | 0 | } |
1499 | | |
1500 | 0 | auto codepoint = static_cast<unsigned long>(ch & ~to_clear); |
1501 | 0 | while (bytes_needed > 0) { |
1502 | 0 | --bytes_needed; |
1503 | 0 | ch = static_cast<unsigned char>(utf8_val.at(pos++)); |
1504 | 0 | if ((ch & 0xc0) != 0x80) { |
1505 | 0 | --pos; |
1506 | 0 | error = true; |
1507 | 0 | return 0xfffd; |
1508 | 0 | } |
1509 | 0 | codepoint <<= 6; |
1510 | 0 | codepoint += (ch & 0x3f); |
1511 | 0 | } |
1512 | 0 | unsigned long lower_bound = 0; |
1513 | 0 | switch (pos - o_pos) { |
1514 | 0 | case 2: |
1515 | 0 | lower_bound = 1 << 7; |
1516 | 0 | break; |
1517 | 0 | case 3: |
1518 | 0 | lower_bound = 1 << 11; |
1519 | 0 | break; |
1520 | 0 | case 4: |
1521 | 0 | lower_bound = 1 << 16; |
1522 | 0 | break; |
1523 | 0 | case 5: |
1524 | 0 | lower_bound = 1 << 12; |
1525 | 0 | break; |
1526 | 0 | case 6: |
1527 | 0 | lower_bound = 1 << 26; |
1528 | 0 | break; |
1529 | 0 | default: |
1530 | 0 | lower_bound = 0; |
1531 | 0 | } |
1532 | | |
1533 | 0 | if (lower_bound > 0 && codepoint < lower_bound) { |
1534 | | // Too many bytes were used, but return whatever character was encoded. |
1535 | 0 | error = true; |
1536 | 0 | } |
1537 | 0 | return codepoint; |
1538 | 0 | } |
1539 | | |
1540 | | static bool |
1541 | | transcode_utf8(std::string const& utf8_val, std::string& result, encoding_e encoding, char unknown) |
1542 | 0 | { |
1543 | 0 | bool okay = true; |
1544 | 0 | result.clear(); |
1545 | 0 | size_t len = utf8_val.length(); |
1546 | 0 | switch (encoding) { |
1547 | 0 | case e_utf16: |
1548 | 0 | result += "\xfe\xff"; |
1549 | 0 | break; |
1550 | 0 | case e_pdfdoc: |
1551 | | // We need to avoid having the result start with something that will be interpreted as |
1552 | | // UTF-16 or UTF-8, meaning we can't end up with a string that starts with "fe ff", |
1553 | | // (UTF-16-BE) "ff fe" (UTF-16-LE, not officially part of the PDF spec, but recognized by |
1554 | | // most readers including qpdf), or "ef bb bf" (UTF-8). It's more efficient to check the |
1555 | | // input string to see if it will map to one of those sequences than to check the output |
1556 | | // string since all cases start with the same starting character. |
1557 | 0 | if ((len >= 4) && (utf8_val[0] == '\xc3')) { |
1558 | 0 | static std::string fe_ff("\xbe\xc3\xbf"); |
1559 | 0 | static std::string ff_fe("\xbf\xc3\xbe"); |
1560 | 0 | static std::string ef_bb_bf("\xaf\xc2\xbb\xc2\xbf"); |
1561 | | // C++-20 has starts_with, but when this was written, qpdf had a minimum supported |
1562 | | // version of C++-17. |
1563 | 0 | if ((utf8_val.compare(1, 3, fe_ff) == 0) || (utf8_val.compare(1, 3, ff_fe) == 0) || |
1564 | 0 | (utf8_val.compare(1, 5, ef_bb_bf) == 0)) { |
1565 | 0 | result += unknown; |
1566 | 0 | okay = false; |
1567 | 0 | } |
1568 | 0 | } |
1569 | 0 | break; |
1570 | 0 | default: |
1571 | 0 | break; |
1572 | 0 | } |
1573 | 0 | size_t pos = 0; |
1574 | 0 | while (pos < len) { |
1575 | 0 | bool error = false; |
1576 | 0 | unsigned long codepoint = QUtil::get_next_utf8_codepoint(utf8_val, pos, error); |
1577 | 0 | if (error) { |
1578 | 0 | okay = false; |
1579 | 0 | if (encoding == e_utf16) { |
1580 | 0 | result += "\xff\xfd"; |
1581 | 0 | } else { |
1582 | 0 | result.append(1, unknown); |
1583 | 0 | } |
1584 | 0 | } else if (codepoint < 128) { |
1585 | 0 | char ch = static_cast<char>(codepoint); |
1586 | 0 | if (encoding == e_utf16) { |
1587 | 0 | result += QUtil::toUTF16(QIntC::to_ulong(ch)); |
1588 | 0 | } else if ((encoding == e_pdfdoc) && (((ch >= 0x18) && (ch <= 0x1f)) || (ch == 127))) { |
1589 | | // PDFDocEncoding maps some low characters to Unicode, so if we encounter those |
1590 | | // invalid UTF-8 code points, map them to unknown so reversing the mapping doesn't |
1591 | | // change them into other characters. |
1592 | 0 | okay = false; |
1593 | 0 | result.append(1, unknown); |
1594 | 0 | } else { |
1595 | 0 | result.append(1, ch); |
1596 | 0 | } |
1597 | 0 | } else if (encoding == e_utf16) { |
1598 | 0 | result += QUtil::toUTF16(codepoint); |
1599 | 0 | } else if ((codepoint == 0xad) && (encoding == e_pdfdoc)) { |
1600 | | // PDFDocEncoding omits 0x00ad (soft hyphen). |
1601 | 0 | okay = false; |
1602 | 0 | result.append(1, unknown); |
1603 | 0 | } else if ( |
1604 | 0 | (codepoint > 160) && (codepoint < 256) && |
1605 | 0 | ((encoding == e_winansi) || (encoding == e_pdfdoc))) { |
1606 | 0 | result.append(1, static_cast<char>(codepoint & 0xff)); |
1607 | 0 | } else { |
1608 | 0 | unsigned char ch = '\0'; |
1609 | 0 | if (encoding == e_winansi) { |
1610 | 0 | ch = encode_winansi(codepoint); |
1611 | 0 | } else if (encoding == e_macroman) { |
1612 | 0 | ch = encode_macroman(codepoint); |
1613 | 0 | } else if (encoding == e_pdfdoc) { |
1614 | 0 | ch = encode_pdfdoc(codepoint); |
1615 | 0 | } |
1616 | 0 | if (ch == '\0') { |
1617 | 0 | okay = false; |
1618 | 0 | ch = static_cast<unsigned char>(unknown); |
1619 | 0 | } |
1620 | 0 | result.append(1, static_cast<char>(ch)); |
1621 | 0 | } |
1622 | 0 | } |
1623 | 0 | return okay; |
1624 | 0 | } |
1625 | | |
1626 | | static std::string |
1627 | | transcode_utf8(std::string const& utf8_val, encoding_e encoding, char unknown) |
1628 | 0 | { |
1629 | 0 | std::string result; |
1630 | 0 | transcode_utf8(utf8_val, result, encoding, unknown); |
1631 | 0 | return result; |
1632 | 0 | } |
1633 | | |
1634 | | std::string |
1635 | | QUtil::utf8_to_utf16(std::string const& utf8) |
1636 | 0 | { |
1637 | 0 | return transcode_utf8(utf8, e_utf16, 0); |
1638 | 0 | } |
1639 | | |
1640 | | std::string |
1641 | | QUtil::utf8_to_ascii(std::string const& utf8, char unknown_char) |
1642 | 0 | { |
1643 | 0 | return transcode_utf8(utf8, e_ascii, unknown_char); |
1644 | 0 | } |
1645 | | |
1646 | | std::string |
1647 | | QUtil::utf8_to_win_ansi(std::string const& utf8, char unknown_char) |
1648 | 0 | { |
1649 | 0 | return transcode_utf8(utf8, e_winansi, unknown_char); |
1650 | 0 | } |
1651 | | |
1652 | | std::string |
1653 | | QUtil::utf8_to_mac_roman(std::string const& utf8, char unknown_char) |
1654 | 0 | { |
1655 | 0 | return transcode_utf8(utf8, e_macroman, unknown_char); |
1656 | 0 | } |
1657 | | |
1658 | | std::string |
1659 | | QUtil::utf8_to_pdf_doc(std::string const& utf8, char unknown_char) |
1660 | 0 | { |
1661 | 0 | return transcode_utf8(utf8, e_pdfdoc, unknown_char); |
1662 | 0 | } |
1663 | | |
1664 | | bool |
1665 | | QUtil::utf8_to_ascii(std::string const& utf8, std::string& ascii, char unknown_char) |
1666 | 0 | { |
1667 | 0 | return transcode_utf8(utf8, ascii, e_ascii, unknown_char); |
1668 | 0 | } |
1669 | | |
1670 | | bool |
1671 | | QUtil::utf8_to_win_ansi(std::string const& utf8, std::string& win, char unknown_char) |
1672 | 0 | { |
1673 | 0 | return transcode_utf8(utf8, win, e_winansi, unknown_char); |
1674 | 0 | } |
1675 | | |
1676 | | bool |
1677 | | QUtil::utf8_to_mac_roman(std::string const& utf8, std::string& mac, char unknown_char) |
1678 | 0 | { |
1679 | 0 | return transcode_utf8(utf8, mac, e_macroman, unknown_char); |
1680 | 0 | } |
1681 | | |
1682 | | bool |
1683 | | QUtil::utf8_to_pdf_doc(std::string const& utf8, std::string& pdfdoc, char unknown_char) |
1684 | 0 | { |
1685 | 0 | return transcode_utf8(utf8, pdfdoc, e_pdfdoc, unknown_char); |
1686 | 0 | } |
1687 | | |
1688 | | bool |
1689 | | QUtil::is_utf16(std::string const& val) |
1690 | 0 | { |
1691 | 0 | return ( |
1692 | 0 | (val.length() >= 2) && |
1693 | 0 | (((val.at(0) == '\xfe') && (val.at(1) == '\xff')) || |
1694 | 0 | ((val.at(0) == '\xff') && (val.at(1) == '\xfe')))); |
1695 | 0 | } |
1696 | | |
1697 | | bool |
1698 | | QUtil::is_explicit_utf8(std::string const& val) |
1699 | 0 | { |
1700 | | // QPDF_String.cc knows that this is a 3-byte sequence. |
1701 | 0 | return ( |
1702 | 0 | (val.length() >= 3) && (val.at(0) == '\xef') && (val.at(1) == '\xbb') && |
1703 | 0 | (val.at(2) == '\xbf')); |
1704 | 0 | } |
1705 | | |
1706 | | std::string |
1707 | | QUtil::utf16_to_utf8(std::string const& val) |
1708 | 0 | { |
1709 | 0 | std::string result; |
1710 | | // This code uses unsigned long and unsigned short to hold codepoint values. It requires |
1711 | | // unsigned long to be at least 32 bits and unsigned short to be at least 16 bits, but it will |
1712 | | // work fine if they are larger. |
1713 | 0 | unsigned long codepoint = 0L; |
1714 | 0 | size_t len = val.length(); |
1715 | 0 | size_t start = 0; |
1716 | 0 | bool is_le = false; |
1717 | 0 | if (is_utf16(val)) { |
1718 | 0 | if (static_cast<unsigned char>(val.at(0)) == 0xff) { |
1719 | 0 | is_le = true; |
1720 | 0 | } |
1721 | 0 | start += 2; |
1722 | 0 | } |
1723 | | // If the string has an odd number of bytes, the last byte is ignored. |
1724 | 0 | for (size_t i = start; i + 1 < len; i += 2) { |
1725 | | // Convert from UTF16-BE. If we get a malformed codepoint, this code will generate |
1726 | | // incorrect output without giving a warning. Specifically, a high codepoint not followed |
1727 | | // by a low codepoint will be discarded, and a low codepoint not preceded by a high |
1728 | | // codepoint will just get its low 10 bits output. |
1729 | 0 | auto msb = is_le ? i + 1 : i; |
1730 | 0 | auto lsb = is_le ? i : i + 1; |
1731 | 0 | unsigned short bits = QIntC::to_ushort( |
1732 | 0 | (static_cast<unsigned char>(val.at(msb)) << 8) + |
1733 | 0 | static_cast<unsigned char>(val.at(lsb))); |
1734 | 0 | if ((bits & 0xFC00) == 0xD800) { |
1735 | 0 | codepoint = 0x10000U + ((bits & 0x3FFU) << 10U); |
1736 | 0 | continue; |
1737 | 0 | } else if ((bits & 0xFC00) == 0xDC00) { |
1738 | 0 | if (codepoint != 0) { |
1739 | 0 | QTC::TC("qpdf", "QUtil non-trivial UTF-16"); |
1740 | 0 | } |
1741 | 0 | codepoint += bits & 0x3FF; |
1742 | 0 | } else { |
1743 | 0 | codepoint = bits; |
1744 | 0 | } |
1745 | | |
1746 | 0 | result += QUtil::toUTF8(codepoint); |
1747 | 0 | codepoint = 0; |
1748 | 0 | } |
1749 | 0 | return result; |
1750 | 0 | } |
1751 | | |
1752 | | std::string |
1753 | | QUtil::win_ansi_to_utf8(std::string const& val) |
1754 | 0 | { |
1755 | 0 | std::string result; |
1756 | 0 | size_t len = val.length(); |
1757 | 0 | for (unsigned int i = 0; i < len; ++i) { |
1758 | 0 | unsigned char ch = static_cast<unsigned char>(val.at(i)); |
1759 | 0 | unsigned short ch_short = ch; |
1760 | 0 | if ((ch >= 128) && (ch <= 160)) { |
1761 | 0 | ch_short = win_ansi_to_unicode[ch - 128]; |
1762 | 0 | } |
1763 | 0 | result += QUtil::toUTF8(ch_short); |
1764 | 0 | } |
1765 | 0 | return result; |
1766 | 0 | } |
1767 | | |
1768 | | std::string |
1769 | | QUtil::mac_roman_to_utf8(std::string const& val) |
1770 | 0 | { |
1771 | 0 | std::string result; |
1772 | 0 | size_t len = val.length(); |
1773 | 0 | for (unsigned int i = 0; i < len; ++i) { |
1774 | 0 | unsigned char ch = static_cast<unsigned char>(val.at(i)); |
1775 | 0 | unsigned short ch_short = ch; |
1776 | 0 | if (ch >= 128) { |
1777 | 0 | ch_short = mac_roman_to_unicode[ch - 128]; |
1778 | 0 | } |
1779 | 0 | result += QUtil::toUTF8(ch_short); |
1780 | 0 | } |
1781 | 0 | return result; |
1782 | 0 | } |
1783 | | |
1784 | | std::string |
1785 | | QUtil::pdf_doc_to_utf8(std::string const& val) |
1786 | 0 | { |
1787 | 0 | std::string result; |
1788 | 0 | size_t len = val.length(); |
1789 | 0 | for (unsigned int i = 0; i < len; ++i) { |
1790 | 0 | unsigned char ch = static_cast<unsigned char>(val.at(i)); |
1791 | 0 | unsigned short ch_short = ch; |
1792 | 0 | if ((ch >= 127) && (ch <= 160)) { |
1793 | 0 | ch_short = pdf_doc_to_unicode[ch - 127]; |
1794 | 0 | } else if ((ch >= 24) && (ch <= 31)) { |
1795 | 0 | ch_short = pdf_doc_low_to_unicode[ch - 24]; |
1796 | 0 | } else if (ch == 173) { |
1797 | 0 | ch_short = 0xfffd; |
1798 | 0 | } |
1799 | 0 | result += QUtil::toUTF8(ch_short); |
1800 | 0 | } |
1801 | 0 | return result; |
1802 | 0 | } |
1803 | | |
1804 | | void |
1805 | | QUtil::analyze_encoding( |
1806 | | std::string const& val, bool& has_8bit_chars, bool& is_valid_utf8, bool& is_utf16) |
1807 | 0 | { |
1808 | 0 | has_8bit_chars = is_utf16 = is_valid_utf8 = false; |
1809 | 0 | if (QUtil::is_utf16(val)) { |
1810 | 0 | has_8bit_chars = true; |
1811 | 0 | is_utf16 = true; |
1812 | 0 | return; |
1813 | 0 | } |
1814 | 0 | size_t len = val.length(); |
1815 | 0 | size_t pos = 0; |
1816 | 0 | bool any_errors = false; |
1817 | 0 | while (pos < len) { |
1818 | 0 | bool error = false; |
1819 | 0 | auto o_pos = pos; |
1820 | 0 | get_next_utf8_codepoint(val, pos, error); |
1821 | 0 | if (error) { |
1822 | 0 | any_errors = true; |
1823 | 0 | } |
1824 | 0 | if (pos - o_pos > 1 || val[o_pos] & 0x80) { |
1825 | 0 | has_8bit_chars = true; |
1826 | 0 | } |
1827 | 0 | } |
1828 | 0 | if (has_8bit_chars && (!any_errors)) { |
1829 | 0 | is_valid_utf8 = true; |
1830 | 0 | } |
1831 | 0 | } |
1832 | | |
1833 | | std::vector<std::string> |
1834 | | QUtil::possible_repaired_encodings(std::string supplied) |
1835 | 0 | { |
1836 | 0 | std::vector<std::string> result; |
1837 | | // Always include the original string |
1838 | 0 | result.push_back(supplied); |
1839 | 0 | bool has_8bit_chars = false; |
1840 | 0 | bool is_valid_utf8 = false; |
1841 | 0 | bool is_utf16 = false; |
1842 | 0 | analyze_encoding(supplied, has_8bit_chars, is_valid_utf8, is_utf16); |
1843 | 0 | if (!has_8bit_chars) { |
1844 | 0 | return result; |
1845 | 0 | } |
1846 | 0 | if (is_utf16) { |
1847 | | // Convert to UTF-8 and pretend we got a UTF-8 string. |
1848 | 0 | is_utf16 = false; |
1849 | 0 | is_valid_utf8 = true; |
1850 | 0 | supplied = utf16_to_utf8(supplied); |
1851 | 0 | } |
1852 | 0 | std::string output; |
1853 | 0 | if (is_valid_utf8) { |
1854 | | // Maybe we were given UTF-8 but wanted one of the single-byte encodings. |
1855 | 0 | if (utf8_to_pdf_doc(supplied, output)) { |
1856 | 0 | result.push_back(output); |
1857 | 0 | } |
1858 | 0 | if (utf8_to_win_ansi(supplied, output)) { |
1859 | 0 | result.push_back(output); |
1860 | 0 | } |
1861 | 0 | if (utf8_to_mac_roman(supplied, output)) { |
1862 | 0 | result.push_back(output); |
1863 | 0 | } |
1864 | 0 | } else { |
1865 | | // Maybe we were given one of the single-byte encodings but wanted UTF-8. |
1866 | 0 | std::string from_pdf_doc(pdf_doc_to_utf8(supplied)); |
1867 | 0 | result.push_back(from_pdf_doc); |
1868 | 0 | std::string from_win_ansi(win_ansi_to_utf8(supplied)); |
1869 | 0 | result.push_back(from_win_ansi); |
1870 | 0 | std::string from_mac_roman(mac_roman_to_utf8(supplied)); |
1871 | 0 | result.push_back(from_mac_roman); |
1872 | | |
1873 | | // Maybe we were given one of the other single-byte encodings but wanted one of the other |
1874 | | // ones. |
1875 | 0 | if (utf8_to_win_ansi(from_pdf_doc, output)) { |
1876 | 0 | result.push_back(output); |
1877 | 0 | } |
1878 | 0 | if (utf8_to_mac_roman(from_pdf_doc, output)) { |
1879 | 0 | result.push_back(output); |
1880 | 0 | } |
1881 | 0 | if (utf8_to_pdf_doc(from_win_ansi, output)) { |
1882 | 0 | result.push_back(output); |
1883 | 0 | } |
1884 | 0 | if (utf8_to_mac_roman(from_win_ansi, output)) { |
1885 | 0 | result.push_back(output); |
1886 | 0 | } |
1887 | 0 | if (utf8_to_pdf_doc(from_mac_roman, output)) { |
1888 | 0 | result.push_back(output); |
1889 | 0 | } |
1890 | 0 | if (utf8_to_win_ansi(from_mac_roman, output)) { |
1891 | 0 | result.push_back(output); |
1892 | 0 | } |
1893 | 0 | } |
1894 | | // De-duplicate |
1895 | 0 | std::vector<std::string> t; |
1896 | 0 | std::set<std::string> seen; |
1897 | 0 | for (auto const& iter: result) { |
1898 | 0 | if (!seen.contains(iter)) { |
1899 | 0 | seen.insert(iter); |
1900 | 0 | t.push_back(iter); |
1901 | 0 | } |
1902 | 0 | } |
1903 | 0 | return t; |
1904 | 0 | } |
1905 | | |
1906 | | #ifndef QPDF_NO_WCHAR_T |
1907 | | static int |
1908 | | call_main_from_wmain( |
1909 | | bool, int argc, wchar_t const* const argv[], std::function<int(int, char*[])> realmain) |
1910 | 0 | { |
1911 | | // argv contains UTF-16-encoded strings with a 16-bit wchar_t. Convert this to UTF-8-encoded |
1912 | | // strings for compatibility with other systems. That way the rest of qpdf.cc can just act like |
1913 | | // arguments are UTF-8. |
1914 | |
|
1915 | 0 | std::vector<std::string> utf8_argv; |
1916 | 0 | utf8_argv.reserve(QIntC::to_size(argc)); |
1917 | 0 | for (int i = 0; i < argc; ++i) { |
1918 | 0 | std::string utf16; |
1919 | 0 | for (size_t j = 0; j < std::wcslen(argv[i]); ++j) { |
1920 | 0 | unsigned short codepoint = static_cast<unsigned short>(argv[i][j]); |
1921 | 0 | utf16.append(1, static_cast<char>(QIntC::to_uchar(codepoint >> 8))); |
1922 | 0 | utf16.append(1, static_cast<char>(QIntC::to_uchar(codepoint & 0xff))); |
1923 | 0 | } |
1924 | 0 | utf8_argv.emplace_back(QUtil::utf16_to_utf8(utf16)); |
1925 | 0 | } |
1926 | 0 | std::vector<char*> new_argv; |
1927 | 0 | new_argv.reserve(utf8_argv.size() + 1U); |
1928 | 0 | for (auto const& arg: utf8_argv) { |
1929 | 0 | new_argv.emplace_back(const_cast<char*>(arg.data())); |
1930 | 0 | } |
1931 | 0 | argc = QIntC::to_int(utf8_argv.size()); |
1932 | 0 | new_argv.emplace_back(nullptr); |
1933 | 0 | return realmain(argc, new_argv.data()); |
1934 | 0 | } |
1935 | | |
1936 | | int |
1937 | | QUtil::call_main_from_wmain(int argc, wchar_t* argv[], std::function<int(int, char*[])> realmain) |
1938 | 0 | { |
1939 | 0 | return ::call_main_from_wmain(true, argc, argv, realmain); |
1940 | 0 | } |
1941 | | |
1942 | | int |
1943 | | QUtil::call_main_from_wmain( |
1944 | | int argc, wchar_t const* const argv[], std::function<int(int, char const* const[])> realmain) |
1945 | 0 | { |
1946 | 0 | return ::call_main_from_wmain(true, argc, argv, [realmain](int new_argc, char* new_argv[]) { |
1947 | 0 | return realmain(new_argc, new_argv); |
1948 | 0 | }); |
1949 | 0 | } |
1950 | | |
1951 | | #endif // QPDF_NO_WCHAR_T |
1952 | | |
1953 | | size_t |
1954 | | QUtil::get_max_memory_usage() |
1955 | 0 | { |
1956 | 0 | #if defined(HAVE_MALLOC_INFO) && defined(HAVE_OPEN_MEMSTREAM) |
1957 | 0 | static std::regex tag_re("<(/?\\w+)([^>]*?)>"); |
1958 | 0 | static std::regex attr_re("(\\w+)=\"(.*?)\""); |
1959 | |
|
1960 | 0 | char* buf; |
1961 | 0 | size_t size; |
1962 | 0 | FILE* f = open_memstream(&buf, &size); |
1963 | 0 | if (f == nullptr) { |
1964 | 0 | return 0; |
1965 | 0 | } |
1966 | 0 | malloc_info(0, f); |
1967 | 0 | fclose(f); |
1968 | 0 | if (QUtil::get_env("QPDF_DEBUG_MEM_USAGE")) { |
1969 | 0 | fprintf(stderr, "%s", buf); |
1970 | 0 | } |
1971 | | |
1972 | | // Warning: this code uses regular expression to extract data from an XML string. This is |
1973 | | // generally a bad idea, but we're going to do it anyway because QUtil.hh warns against using |
1974 | | // this function for other than development/testing, and if this function fails to generate |
1975 | | // reasonable output during performance testing, it will be noticed. |
1976 | | |
1977 | | // This is my best guess at how to interpret malloc_info. Anyway it seems to provide useful |
1978 | | // information for detecting code changes that drastically change memory usage. |
1979 | 0 | size_t result = 0; |
1980 | 0 | try { |
1981 | 0 | std::cregex_iterator m_begin(buf, buf + size, tag_re); |
1982 | 0 | std::cregex_iterator cr_end; |
1983 | 0 | std::sregex_iterator sr_end; |
1984 | |
|
1985 | 0 | int in_heap = 0; |
1986 | 0 | for (auto m = m_begin; m != cr_end; ++m) { |
1987 | 0 | std::string tag(m->str(1)); |
1988 | 0 | if (tag == "heap") { |
1989 | 0 | ++in_heap; |
1990 | 0 | } else if (tag == "/heap") { |
1991 | 0 | --in_heap; |
1992 | 0 | } else if (in_heap == 0) { |
1993 | 0 | std::string rest = m->str(2); |
1994 | 0 | std::map<std::string, std::string> attrs; |
1995 | 0 | std::sregex_iterator a_begin(rest.begin(), rest.end(), attr_re); |
1996 | 0 | for (auto m2 = a_begin; m2 != sr_end; ++m2) { |
1997 | 0 | attrs[m2->str(1)] = m2->str(2); |
1998 | 0 | } |
1999 | 0 | if (tag == "total") { |
2000 | 0 | if (attrs.contains("size")) { |
2001 | 0 | result += QIntC::to_size(QUtil::string_to_ull(attrs["size"].c_str())); |
2002 | 0 | } |
2003 | 0 | } else if (tag == "system" && attrs["type"] == "max") { |
2004 | 0 | result += QIntC::to_size(QUtil::string_to_ull(attrs["size"].c_str())); |
2005 | 0 | } |
2006 | 0 | } |
2007 | 0 | } |
2008 | 0 | } catch (...) { |
2009 | | // ignore -- just return 0 |
2010 | 0 | } |
2011 | 0 | free(buf); |
2012 | 0 | return result; |
2013 | | #else |
2014 | | return 0; |
2015 | | #endif |
2016 | 0 | } |
2017 | | |
2018 | | char |
2019 | | QUtil::hex_decode_char(char digit) |
2020 | 0 | { |
2021 | 0 | return util::hex_decode_char(digit); |
2022 | 0 | } |
2023 | | |
2024 | | std::string |
2025 | | QUtil::hex_encode_char(char c) |
2026 | 0 | { |
2027 | 0 | return util::hex_encode_char(c); |
2028 | 0 | } |
2029 | | |
2030 | | bool |
2031 | | QUtil::is_number(char const* p) |
2032 | 0 | { |
2033 | | // No longer used by qpdf. |
2034 | | |
2035 | | // ^[\+\-]?(\.\d*|\d+(\.\d*)?)$ |
2036 | 0 | if (!*p) { |
2037 | 0 | return false; |
2038 | 0 | } |
2039 | 0 | if ((*p == '-') || (*p == '+')) { |
2040 | 0 | ++p; |
2041 | 0 | } |
2042 | 0 | bool found_dot = false; |
2043 | 0 | bool found_digit = false; |
2044 | 0 | for (; *p; ++p) { |
2045 | 0 | if (*p == '.') { |
2046 | 0 | if (found_dot) { |
2047 | | // only one dot |
2048 | 0 | return false; |
2049 | 0 | } |
2050 | 0 | found_dot = true; |
2051 | 0 | } else if (util::is_digit(*p)) { |
2052 | 0 | found_digit = true; |
2053 | 0 | } else { |
2054 | 0 | return false; |
2055 | 0 | } |
2056 | 0 | } |
2057 | 0 | return found_digit; |
2058 | 0 | } |
2059 | | |
2060 | | bool |
2061 | | QUtil::is_space(char c) |
2062 | 0 | { |
2063 | 0 | return util::is_space(c); |
2064 | 0 | } |
2065 | | |
2066 | | bool |
2067 | | QUtil::is_digit(char c) |
2068 | 0 | { |
2069 | 0 | return util::is_digit(c); |
2070 | 0 | } |
2071 | | |
2072 | | bool |
2073 | | QUtil::is_hex_digit(char c) |
2074 | 0 | { |
2075 | 0 | return util::is_hex_digit(c); |
2076 | 0 | } |