Coverage Report

Created: 2025-12-05 06:54

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/qpdf/libqpdf/QUtil.cc
Line
Count
Source
1
// Include qpdf-config.h first so off_t is guaranteed to have the right size.
2
#include <qpdf/qpdf-config.h>
3
4
#include <qpdf/QUtil.hh>
5
#include <qpdf/Util.hh>
6
7
#include <qpdf/CryptoRandomDataProvider.hh>
8
#include <qpdf/Pipeline.hh>
9
#include <qpdf/QIntC.hh>
10
#include <qpdf/QPDFSystemError.hh>
11
#include <qpdf/QTC.hh>
12
#include <qpdf/Util.hh>
13
14
#include <cerrno>
15
#include <cstdlib>
16
#include <cstring>
17
#include <fcntl.h>
18
#include <fstream>
19
#include <iomanip>
20
#include <map>
21
#include <memory>
22
#include <regex>
23
#include <set>
24
#include <sstream>
25
#include <stdexcept>
26
#ifndef QPDF_NO_WCHAR_T
27
# include <cwchar>
28
#endif
29
#ifdef _WIN32
30
# define WIN32_LEAN_AND_MEAN
31
# include <direct.h>
32
# include <io.h>
33
# include <windows.h>
34
#else
35
# include <sys/stat.h>
36
# include <unistd.h>
37
#endif
38
#ifdef HAVE_MALLOC_INFO
39
# include <malloc.h>
40
#endif
41
42
using namespace qpdf;
43
using namespace std::literals;
44
45
// First element is 24
46
static unsigned short pdf_doc_low_to_unicode[] = {
47
    0x02d8, // 0x18    BREVE
48
    0x02c7, // 0x19    CARON
49
    0x02c6, // 0x1a    MODIFIER LETTER CIRCUMFLEX ACCENT
50
    0x02d9, // 0x1b    DOT ABOVE
51
    0x02dd, // 0x1c    DOUBLE ACUTE ACCENT
52
    0x02db, // 0x1d    OGONEK
53
    0x02da, // 0x1e    RING ABOVE
54
    0x02dc, // 0x1f    SMALL TILDE
55
};
56
// First element is 127
57
static unsigned short pdf_doc_to_unicode[] = {
58
    0xfffd, // 0x7f    UNDEFINED
59
    0x2022, // 0x80    BULLET
60
    0x2020, // 0x81    DAGGER
61
    0x2021, // 0x82    DOUBLE DAGGER
62
    0x2026, // 0x83    HORIZONTAL ELLIPSIS
63
    0x2014, // 0x84    EM DASH
64
    0x2013, // 0x85    EN DASH
65
    0x0192, // 0x86    SMALL LETTER F WITH HOOK
66
    0x2044, // 0x87    FRACTION SLASH (solidus)
67
    0x2039, // 0x88    SINGLE LEFT-POINTING ANGLE QUOTATION MARK
68
    0x203a, // 0x89    SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
69
    0x2212, // 0x8a    MINUS SIGN
70
    0x2030, // 0x8b    PER MILLE SIGN
71
    0x201e, // 0x8c    DOUBLE LOW-9 QUOTATION MARK (quotedblbase)
72
    0x201c, // 0x8d    LEFT DOUBLE QUOTATION MARK (double quote left)
73
    0x201d, // 0x8e    RIGHT DOUBLE QUOTATION MARK (quotedblright)
74
    0x2018, // 0x8f    LEFT SINGLE QUOTATION MARK (quoteleft)
75
    0x2019, // 0x90    RIGHT SINGLE QUOTATION MARK (quoteright)
76
    0x201a, // 0x91    SINGLE LOW-9 QUOTATION MARK (quotesinglbase)
77
    0x2122, // 0x92    TRADE MARK SIGN
78
    0xfb01, // 0x93    LATIN SMALL LIGATURE FI
79
    0xfb02, // 0x94    LATIN SMALL LIGATURE FL
80
    0x0141, // 0x95    LATIN CAPITAL LETTER L WITH STROKE
81
    0x0152, // 0x96    LATIN CAPITAL LIGATURE OE
82
    0x0160, // 0x97    LATIN CAPITAL LETTER S WITH CARON
83
    0x0178, // 0x98    LATIN CAPITAL LETTER Y WITH DIAERESIS
84
    0x017d, // 0x99    LATIN CAPITAL LETTER Z WITH CARON
85
    0x0131, // 0x9a    LATIN SMALL LETTER DOTLESS I
86
    0x0142, // 0x9b    LATIN SMALL LETTER L WITH STROKE
87
    0x0153, // 0x9c    LATIN SMALL LIGATURE OE
88
    0x0161, // 0x9d    LATIN SMALL LETTER S WITH CARON
89
    0x017e, // 0x9e    LATIN SMALL LETTER Z WITH CARON
90
    0xfffd, // 0x9f    UNDEFINED
91
    0x20ac, // 0xa0    EURO SIGN
92
};
93
static unsigned short win_ansi_to_unicode[] = {
94
    0x20ac, // 0x80
95
    0xfffd, // 0x81
96
    0x201a, // 0x82
97
    0x0192, // 0x83
98
    0x201e, // 0x84
99
    0x2026, // 0x85
100
    0x2020, // 0x86
101
    0x2021, // 0x87
102
    0x02c6, // 0x88
103
    0x2030, // 0x89
104
    0x0160, // 0x8a
105
    0x2039, // 0x8b
106
    0x0152, // 0x8c
107
    0xfffd, // 0x8d
108
    0x017d, // 0x8e
109
    0xfffd, // 0x8f
110
    0xfffd, // 0x90
111
    0x2018, // 0x91
112
    0x2019, // 0x92
113
    0x201c, // 0x93
114
    0x201d, // 0x94
115
    0x2022, // 0x95
116
    0x2013, // 0x96
117
    0x2014, // 0x97
118
    0x0303, // 0x98
119
    0x2122, // 0x99
120
    0x0161, // 0x9a
121
    0x203a, // 0x9b
122
    0x0153, // 0x9c
123
    0xfffd, // 0x9d
124
    0x017e, // 0x9e
125
    0x0178, // 0x9f
126
    0x00a0, // 0xa0
127
};
128
static unsigned short mac_roman_to_unicode[] = {
129
    0x00c4, // 0x80
130
    0x00c5, // 0x81
131
    0x00c7, // 0x82
132
    0x00c9, // 0x83
133
    0x00d1, // 0x84
134
    0x00d6, // 0x85
135
    0x00dc, // 0x86
136
    0x00e1, // 0x87
137
    0x00e0, // 0x88
138
    0x00e2, // 0x89
139
    0x00e4, // 0x8a
140
    0x00e3, // 0x8b
141
    0x00e5, // 0x8c
142
    0x00e7, // 0x8d
143
    0x00e9, // 0x8e
144
    0x00e8, // 0x8f
145
    0x00ea, // 0x90
146
    0x00eb, // 0x91
147
    0x00ed, // 0x92
148
    0x00ec, // 0x93
149
    0x00ee, // 0x94
150
    0x00ef, // 0x95
151
    0x00f1, // 0x96
152
    0x00f3, // 0x97
153
    0x00f2, // 0x98
154
    0x00f4, // 0x99
155
    0x00f6, // 0x9a
156
    0x00f5, // 0x9b
157
    0x00fa, // 0x9c
158
    0x00f9, // 0x9d
159
    0x00fb, // 0x9e
160
    0x00fc, // 0x9f
161
    0x2020, // 0xa0
162
    0x00b0, // 0xa1
163
    0x00a2, // 0xa2
164
    0x00a3, // 0xa3
165
    0x00a7, // 0xa4
166
    0x2022, // 0xa5
167
    0x00b6, // 0xa6
168
    0x00df, // 0xa7
169
    0x00ae, // 0xa8
170
    0x00a9, // 0xa9
171
    0x2122, // 0xaa
172
    0x0301, // 0xab
173
    0x0308, // 0xac
174
    0xfffd, // 0xad
175
    0x00c6, // 0xae
176
    0x00d8, // 0xaf
177
    0xfffd, // 0xb0
178
    0x00b1, // 0xb1
179
    0xfffd, // 0xb2
180
    0xfffd, // 0xb3
181
    0x00a5, // 0xb4
182
    0x03bc, // 0xb5
183
    0xfffd, // 0xb6
184
    0xfffd, // 0xb7
185
    0xfffd, // 0xb8
186
    0xfffd, // 0xb9
187
    0xfffd, // 0xba
188
    0x1d43, // 0xbb
189
    0x1d52, // 0xbc
190
    0xfffd, // 0xbd
191
    0x00e6, // 0xbe
192
    0x00f8, // 0xbf
193
    0x00bf, // 0xc0
194
    0x00a1, // 0xc1
195
    0x00ac, // 0xc2
196
    0xfffd, // 0xc3
197
    0x0192, // 0xc4
198
    0xfffd, // 0xc5
199
    0xfffd, // 0xc6
200
    0x00ab, // 0xc7
201
    0x00bb, // 0xc8
202
    0x2026, // 0xc9
203
    0xfffd, // 0xca
204
    0x00c0, // 0xcb
205
    0x00c3, // 0xcc
206
    0x00d5, // 0xcd
207
    0x0152, // 0xce
208
    0x0153, // 0xcf
209
    0x2013, // 0xd0
210
    0x2014, // 0xd1
211
    0x201c, // 0xd2
212
    0x201d, // 0xd3
213
    0x2018, // 0xd4
214
    0x2019, // 0xd5
215
    0x00f7, // 0xd6
216
    0xfffd, // 0xd7
217
    0x00ff, // 0xd8
218
    0x0178, // 0xd9
219
    0x2044, // 0xda
220
    0x00a4, // 0xdb
221
    0x2039, // 0xdc
222
    0x203a, // 0xdd
223
    0xfb01, // 0xde
224
    0xfb02, // 0xdf
225
    0x2021, // 0xe0
226
    0x00b7, // 0xe1
227
    0x201a, // 0xe2
228
    0x201e, // 0xe3
229
    0x2030, // 0xe4
230
    0x00c2, // 0xe5
231
    0x00ca, // 0xe6
232
    0x00c1, // 0xe7
233
    0x00cb, // 0xe8
234
    0x00c8, // 0xe9
235
    0x00cd, // 0xea
236
    0x00ce, // 0xeb
237
    0x00cf, // 0xec
238
    0x00cc, // 0xed
239
    0x00d3, // 0xee
240
    0x00d4, // 0xef
241
    0xfffd, // 0xf0
242
    0x00d2, // 0xf1
243
    0x00da, // 0xf2
244
    0x00db, // 0xf3
245
    0x00d9, // 0xf4
246
    0x0131, // 0xf5
247
    0x02c6, // 0xf6
248
    0x0303, // 0xf7
249
    0x0304, // 0xf8
250
    0x0306, // 0xf9
251
    0x0307, // 0xfa
252
    0x030a, // 0xfb
253
    0x0327, // 0xfc
254
    0x030b, // 0xfd
255
    0x0328, // 0xfe
256
    0x02c7, // 0xff
257
};
258
259
static std::map<unsigned long, unsigned char> unicode_to_win_ansi = {
260
    {0x20ac, 0x80}, {0x201a, 0x82}, {0x192, 0x83},  {0x201e, 0x84}, {0x2026, 0x85}, {0x2020, 0x86},
261
    {0x2021, 0x87}, {0x2c6, 0x88},  {0x2030, 0x89}, {0x160, 0x8a},  {0x2039, 0x8b}, {0x152, 0x8c},
262
    {0x17d, 0x8e},  {0x2018, 0x91}, {0x2019, 0x92}, {0x201c, 0x93}, {0x201d, 0x94}, {0x2022, 0x95},
263
    {0x2013, 0x96}, {0x2014, 0x97}, {0x303, 0x98},  {0x2122, 0x99}, {0x161, 0x9a},  {0x203a, 0x9b},
264
    {0x153, 0x9c},  {0x17e, 0x9e},  {0x178, 0x9f},  {0xa0, 0xa0},
265
};
266
static std::map<unsigned long, unsigned char> unicode_to_mac_roman = {
267
    {0xc4, 0x80},   {0xc5, 0x81},   {0xc7, 0x82},   {0xc9, 0x83},   {0xd1, 0x84},   {0xd6, 0x85},
268
    {0xdc, 0x86},   {0xe1, 0x87},   {0xe0, 0x88},   {0xe2, 0x89},   {0xe4, 0x8a},   {0xe3, 0x8b},
269
    {0xe5, 0x8c},   {0xe7, 0x8d},   {0xe9, 0x8e},   {0xe8, 0x8f},   {0xea, 0x90},   {0xeb, 0x91},
270
    {0xed, 0x92},   {0xec, 0x93},   {0xee, 0x94},   {0xef, 0x95},   {0xf1, 0x96},   {0xf3, 0x97},
271
    {0xf2, 0x98},   {0xf4, 0x99},   {0xf6, 0x9a},   {0xf5, 0x9b},   {0xfa, 0x9c},   {0xf9, 0x9d},
272
    {0xfb, 0x9e},   {0xfc, 0x9f},   {0x2020, 0xa0}, {0xb0, 0xa1},   {0xa2, 0xa2},   {0xa3, 0xa3},
273
    {0xa7, 0xa4},   {0x2022, 0xa5}, {0xb6, 0xa6},   {0xdf, 0xa7},   {0xae, 0xa8},   {0xa9, 0xa9},
274
    {0x2122, 0xaa}, {0x301, 0xab},  {0x308, 0xac},  {0xc6, 0xae},   {0xd8, 0xaf},   {0xb1, 0xb1},
275
    {0xa5, 0xb4},   {0x3bc, 0xb5},  {0x1d43, 0xbb}, {0x1d52, 0xbc}, {0xe6, 0xbe},   {0xf8, 0xbf},
276
    {0xbf, 0xc0},   {0xa1, 0xc1},   {0xac, 0xc2},   {0x192, 0xc4},  {0xab, 0xc7},   {0xbb, 0xc8},
277
    {0x2026, 0xc9}, {0xc0, 0xcb},   {0xc3, 0xcc},   {0xd5, 0xcd},   {0x152, 0xce},  {0x153, 0xcf},
278
    {0x2013, 0xd0}, {0x2014, 0xd1}, {0x201c, 0xd2}, {0x201d, 0xd3}, {0x2018, 0xd4}, {0x2019, 0xd5},
279
    {0xf7, 0xd6},   {0xff, 0xd8},   {0x178, 0xd9},  {0x2044, 0xda}, {0xa4, 0xdb},   {0x2039, 0xdc},
280
    {0x203a, 0xdd}, {0xfb01, 0xde}, {0xfb02, 0xdf}, {0x2021, 0xe0}, {0xb7, 0xe1},   {0x201a, 0xe2},
281
    {0x201e, 0xe3}, {0x2030, 0xe4}, {0xc2, 0xe5},   {0xca, 0xe6},   {0xc1, 0xe7},   {0xcb, 0xe8},
282
    {0xc8, 0xe9},   {0xcd, 0xea},   {0xce, 0xeb},   {0xcf, 0xec},   {0xcc, 0xed},   {0xd3, 0xee},
283
    {0xd4, 0xef},   {0xd2, 0xf1},   {0xda, 0xf2},   {0xdb, 0xf3},   {0xd9, 0xf4},   {0x131, 0xf5},
284
    {0x2c6, 0xf6},  {0x303, 0xf7},  {0x304, 0xf8},  {0x306, 0xf9},  {0x307, 0xfa},  {0x30a, 0xfb},
285
    {0x327, 0xfc},  {0x30b, 0xfd},  {0x328, 0xfe},  {0x2c7, 0xff},
286
};
287
static std::map<unsigned long, unsigned char> unicode_to_pdf_doc = {
288
    {0x02d8, 0x18}, {0x02c7, 0x19}, {0x02c6, 0x1a}, {0x02d9, 0x1b}, {0x02dd, 0x1c}, {0x02db, 0x1d},
289
    {0x02da, 0x1e}, {0x02dc, 0x1f}, {0x2022, 0x80}, {0x2020, 0x81}, {0x2021, 0x82}, {0x2026, 0x83},
290
    {0x2014, 0x84}, {0x2013, 0x85}, {0x0192, 0x86}, {0x2044, 0x87}, {0x2039, 0x88}, {0x203a, 0x89},
291
    {0x2212, 0x8a}, {0x2030, 0x8b}, {0x201e, 0x8c}, {0x201c, 0x8d}, {0x201d, 0x8e}, {0x2018, 0x8f},
292
    {0x2019, 0x90}, {0x201a, 0x91}, {0x2122, 0x92}, {0xfb01, 0x93}, {0xfb02, 0x94}, {0x0141, 0x95},
293
    {0x0152, 0x96}, {0x0160, 0x97}, {0x0178, 0x98}, {0x017d, 0x99}, {0x0131, 0x9a}, {0x0142, 0x9b},
294
    {0x0153, 0x9c}, {0x0161, 0x9d}, {0x017e, 0x9e}, {0xfffd, 0x9f}, {0x20ac, 0xa0},
295
};
296
297
template <typename T>
298
static std::string
299
int_to_string_base_internal(T num, int base, int length)
300
1.06M
{
301
    // Backward compatibility -- int_to_string, which calls this function, used to use sprintf with
302
    // %0*d, so we interpret length such that a negative value appends spaces and a positive value
303
    // prepends zeroes.
304
1.06M
    if (!((base == 8) || (base == 10) || (base == 16))) {
305
0
        throw std::logic_error("int_to_string_base called with unsupported base");
306
0
    }
307
1.06M
    std::string cvt;
308
1.06M
    if (base == 10) {
309
        // Use the more efficient std::to_string when possible
310
1.06M
        cvt = std::to_string(num);
311
1.06M
    } else {
312
0
        std::ostringstream buf;
313
0
        buf.imbue(std::locale::classic());
314
0
        buf << std::setbase(base) << std::nouppercase << num;
315
0
        cvt = buf.str();
316
0
    }
317
1.06M
    std::string result;
318
1.06M
    int str_length = QIntC::to_int(cvt.length());
319
1.06M
    if ((length > 0) && (str_length < length)) {
320
0
        result.append(QIntC::to_size(length - str_length), '0');
321
0
    }
322
1.06M
    result += cvt;
323
1.06M
    if ((length < 0) && (str_length < -length)) {
324
0
        result.append(QIntC::to_size(-length - str_length), ' ');
325
0
    }
326
1.06M
    return result;
327
1.06M
}
QUtil.cc:std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > int_to_string_base_internal<long long>(long long, int, int)
Line
Count
Source
300
1.06M
{
301
    // Backward compatibility -- int_to_string, which calls this function, used to use sprintf with
302
    // %0*d, so we interpret length such that a negative value appends spaces and a positive value
303
    // prepends zeroes.
304
1.06M
    if (!((base == 8) || (base == 10) || (base == 16))) {
305
0
        throw std::logic_error("int_to_string_base called with unsupported base");
306
0
    }
307
1.06M
    std::string cvt;
308
1.06M
    if (base == 10) {
309
        // Use the more efficient std::to_string when possible
310
1.06M
        cvt = std::to_string(num);
311
1.06M
    } else {
312
0
        std::ostringstream buf;
313
0
        buf.imbue(std::locale::classic());
314
0
        buf << std::setbase(base) << std::nouppercase << num;
315
0
        cvt = buf.str();
316
0
    }
317
1.06M
    std::string result;
318
1.06M
    int str_length = QIntC::to_int(cvt.length());
319
1.06M
    if ((length > 0) && (str_length < length)) {
320
0
        result.append(QIntC::to_size(length - str_length), '0');
321
0
    }
322
1.06M
    result += cvt;
323
1.06M
    if ((length < 0) && (str_length < -length)) {
324
0
        result.append(QIntC::to_size(-length - str_length), ' ');
325
0
    }
326
1.06M
    return result;
327
1.06M
}
Unexecuted instantiation: QUtil.cc:std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > int_to_string_base_internal<unsigned long long>(unsigned long long, int, int)
328
329
std::string
330
QUtil::int_to_string(long long num, int length)
331
1.06M
{
332
1.06M
    return int_to_string_base(num, 10, length);
333
1.06M
}
334
335
std::string
336
QUtil::uint_to_string(unsigned long long num, int length)
337
0
{
338
0
    return uint_to_string_base(num, 10, length);
339
0
}
340
341
std::string
342
QUtil::int_to_string_base(long long num, int base, int length)
343
1.06M
{
344
1.06M
    return int_to_string_base_internal(num, base, length);
345
1.06M
}
346
347
std::string
348
QUtil::uint_to_string_base(unsigned long long num, int base, int length)
349
0
{
350
0
    return int_to_string_base_internal(num, base, length);
351
0
}
352
353
std::string
354
QUtil::double_to_string(double num, int decimal_places, bool trim_trailing_zeroes)
355
2.41k
{
356
    // Backward compatibility -- this code used to use sprintf and treated decimal_places <= 0 to
357
    // mean to use the default, which was six decimal places. Starting in 10.2, we trim trailing
358
    // zeroes by default.
359
2.41k
    if (decimal_places <= 0) {
360
2.41k
        decimal_places = 6;
361
2.41k
    }
362
2.41k
    std::ostringstream buf;
363
2.41k
    buf.imbue(std::locale::classic());
364
2.41k
    buf << std::setprecision(decimal_places) << std::fixed << num;
365
2.41k
    std::string result = buf.str();
366
2.41k
    if (trim_trailing_zeroes) {
367
15.6k
        while ((result.length() > 1) && (result.back() == '0')) {
368
13.2k
            result.pop_back();
369
13.2k
        }
370
2.41k
        if ((result.length() > 1) && (result.back() == '.')) {
371
2.11k
            result.pop_back();
372
2.11k
        }
373
2.41k
    }
374
2.41k
    return result;
375
2.41k
}
376
377
long long
378
QUtil::string_to_ll(char const* str)
379
2.84M
{
380
2.84M
    errno = 0;
381
#ifdef _MSC_VER
382
    long long result = _strtoi64(str, 0, 10);
383
#else
384
2.84M
    long long result = strtoll(str, nullptr, 10);
385
2.84M
#endif
386
2.84M
    if (errno == ERANGE) {
387
1.13k
        throw std::range_error(
388
1.13k
            std::string("overflow/underflow converting ") + str + " to 64-bit integer");
389
1.13k
    }
390
2.84M
    return result;
391
2.84M
}
392
393
int
394
QUtil::string_to_int(char const* str)
395
698k
{
396
    // QIntC::to_int does range checking
397
698k
    return QIntC::to_int(string_to_ll(str));
398
698k
}
399
400
unsigned long long
401
QUtil::string_to_ull(char const* str)
402
0
{
403
0
    char const* p = str;
404
0
    while (*p && util::is_space(*p)) {
405
0
        ++p;
406
0
    }
407
0
    if (*p == '-') {
408
0
        throw std::runtime_error(
409
0
            std::string("underflow converting ") + str + " to 64-bit unsigned integer");
410
0
    }
411
412
0
    errno = 0;
413
#ifdef _MSC_VER
414
    unsigned long long result = _strtoui64(str, 0, 10);
415
#else
416
0
    unsigned long long result = strtoull(str, nullptr, 10);
417
0
#endif
418
0
    if (errno == ERANGE) {
419
0
        throw std::runtime_error(
420
0
            std::string("overflow converting ") + str + " to 64-bit unsigned integer");
421
0
    }
422
0
    return result;
423
0
}
424
425
unsigned int
426
QUtil::string_to_uint(char const* str)
427
0
{
428
    // QIntC::to_uint does range checking
429
0
    return QIntC::to_uint(string_to_ull(str));
430
0
}
431
432
bool
433
QUtil::is_long_long(char const* str)
434
1.06M
{
435
1.06M
    try {
436
1.06M
        auto i1 = string_to_ll(str);
437
1.06M
        std::string s1 = int_to_string(i1);
438
1.06M
        return str == s1;
439
1.06M
    } catch (std::exception&) {
440
        // overflow or other error
441
1.11k
    }
442
1.11k
    return false;
443
1.06M
}
444
445
unsigned char*
446
QUtil::unsigned_char_pointer(std::string const& str)
447
0
{
448
0
    return reinterpret_cast<unsigned char*>(const_cast<char*>(str.c_str()));
449
0
}
450
451
unsigned char*
452
QUtil::unsigned_char_pointer(char const* str)
453
0
{
454
0
    return reinterpret_cast<unsigned char*>(const_cast<char*>(str));
455
0
}
456
457
void
458
QUtil::throw_system_error(std::string const& description)
459
0
{
460
0
    throw QPDFSystemError(description, errno);
461
0
}
462
463
int
464
QUtil::os_wrapper(std::string const& description, int status)
465
0
{
466
0
    if (status == -1) {
467
0
        throw_system_error(description);
468
0
    }
469
0
    return status;
470
0
}
471
472
#ifdef _WIN32
473
static std::shared_ptr<wchar_t>
474
win_convert_filename(char const* filename)
475
{
476
    // Convert the utf-8 encoded filename argument to wchar_t*. First,
477
    // convert to utf16, then to wchar_t*. Note that u16 will start
478
    // with the UTF16 marker, which we skip.
479
    std::string u16 = QUtil::utf8_to_utf16(filename);
480
    size_t len = u16.length();
481
    size_t wlen = (len / 2) - 1;
482
    auto wfilenamep = QUtil::make_shared_array<wchar_t>(wlen + 1);
483
    wchar_t* wfilename = wfilenamep.get();
484
    wfilename[wlen] = 0;
485
    for (unsigned int i = 2; i < len; i += 2) {
486
        wfilename[(i / 2) - 1] = static_cast<wchar_t>(
487
            (static_cast<unsigned char>(u16.at(i)) << 8) +
488
            static_cast<unsigned char>(u16.at(i + 1)));
489
    }
490
    return wfilenamep;
491
}
492
#endif
493
494
FILE*
495
QUtil::safe_fopen(char const* filename, char const* mode)
496
0
{
497
0
    FILE* f = nullptr;
498
#ifdef _WIN32
499
    std::shared_ptr<wchar_t> wfilenamep = win_convert_filename(filename);
500
    wchar_t* wfilename = wfilenamep.get();
501
    auto wmodep = QUtil::make_shared_array<wchar_t>(strlen(mode) + 1);
502
    wchar_t* wmode = wmodep.get();
503
    wmode[strlen(mode)] = 0;
504
    for (size_t i = 0; i < strlen(mode); ++i) {
505
        wmode[i] = static_cast<wchar_t>(mode[i]);
506
    }
507
508
# ifdef _MSC_VER
509
    errno_t err = _wfopen_s(&f, wfilename, wmode);
510
    if (err != 0) {
511
        errno = err;
512
    }
513
# else
514
    f = _wfopen(wfilename, wmode);
515
# endif
516
    if (f == 0) {
517
        throw_system_error(std::string("open ") + filename);
518
    }
519
#else
520
0
    f = fopen_wrapper(std::string("open ") + filename, fopen(filename, mode));
521
0
#endif
522
0
    return f;
523
0
}
524
525
FILE*
526
QUtil::fopen_wrapper(std::string const& description, FILE* f)
527
0
{
528
0
    if (f == nullptr) {
529
0
        throw_system_error(description);
530
0
    }
531
0
    return f;
532
0
}
533
534
bool
535
QUtil::file_can_be_opened(char const* filename)
536
0
{
537
0
    try {
538
0
        fclose(safe_fopen(filename, "rb"));
539
0
        return true;
540
0
    } catch (std::runtime_error&) {
541
        // can't open the file
542
0
    }
543
0
    return false;
544
0
}
545
546
int
547
QUtil::seek(FILE* stream, qpdf_offset_t offset, int whence)
548
0
{
549
0
#if HAVE_FSEEKO
550
0
    return fseeko(stream, QIntC::IntConverter<qpdf_offset_t, off_t>::convert(offset), whence);
551
#elif HAVE_FSEEKO64
552
    return fseeko64(stream, offset, whence);
553
#else
554
# if defined _MSC_VER || defined __BORLANDC__
555
    return _fseeki64(stream, offset, whence);
556
# else
557
    return fseek(stream, QIntC::to_long(offset), whence);
558
# endif
559
#endif
560
0
}
561
562
qpdf_offset_t
563
QUtil::tell(FILE* stream)
564
0
{
565
0
#if HAVE_FSEEKO
566
0
    return QIntC::to_offset(ftello(stream));
567
#elif HAVE_FSEEKO64
568
    return QIntC::to_offset(ftello64(stream));
569
#else
570
# if defined _MSC_VER || defined __BORLANDC__
571
    return _ftelli64(stream);
572
# else
573
    return QIntC::to_offset(ftell(stream));
574
# endif
575
#endif
576
0
}
577
578
bool
579
QUtil::same_file(char const* name1, char const* name2)
580
0
{
581
0
    if ((name1 == nullptr) || (strlen(name1) == 0) || (name2 == nullptr) || (strlen(name2) == 0)) {
582
0
        return false;
583
0
    }
584
#ifdef _WIN32
585
    bool same = false;
586
# ifndef AVOID_WINDOWS_HANDLE
587
    HANDLE fh1 = CreateFile(
588
        name1, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
589
    HANDLE fh2 = CreateFile(
590
        name2, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
591
    BY_HANDLE_FILE_INFORMATION fi1;
592
    BY_HANDLE_FILE_INFORMATION fi2;
593
    if ((fh1 != INVALID_HANDLE_VALUE) && (fh2 != INVALID_HANDLE_VALUE) &&
594
        GetFileInformationByHandle(fh1, &fi1) && GetFileInformationByHandle(fh2, &fi2) &&
595
        (fi1.dwVolumeSerialNumber == fi2.dwVolumeSerialNumber) &&
596
        (fi1.nFileIndexLow == fi2.nFileIndexLow) && (fi1.nFileIndexHigh == fi2.nFileIndexHigh)) {
597
        same = true;
598
    }
599
    if (fh1 != INVALID_HANDLE_VALUE) {
600
        CloseHandle(fh1);
601
    }
602
    if (fh2 != INVALID_HANDLE_VALUE) {
603
        CloseHandle(fh2);
604
    }
605
# endif
606
    return same;
607
#else
608
0
    struct stat st1;
609
0
    struct stat st2;
610
0
    if ((stat(name1, &st1) == 0) && (stat(name2, &st2) == 0) && (st1.st_ino == st2.st_ino) &&
611
0
        (st1.st_dev == st2.st_dev)) {
612
0
        return true;
613
0
    }
614
0
#endif
615
0
    return false;
616
0
}
617
618
void
619
QUtil::remove_file(char const* path)
620
0
{
621
#ifdef _WIN32
622
    std::shared_ptr<wchar_t> wpath = win_convert_filename(path);
623
    os_wrapper(std::string("remove ") + path, _wunlink(wpath.get()));
624
#else
625
0
    os_wrapper(std::string("remove ") + path, unlink(path));
626
0
#endif
627
0
}
628
629
void
630
QUtil::rename_file(char const* oldname, char const* newname)
631
0
{
632
#ifdef _WIN32
633
    try {
634
        remove_file(newname);
635
    } catch (QPDFSystemError&) {
636
        // ignore
637
    }
638
    std::shared_ptr<wchar_t> wold = win_convert_filename(oldname);
639
    std::shared_ptr<wchar_t> wnew = win_convert_filename(newname);
640
    os_wrapper(std::string("rename ") + oldname + " " + newname, _wrename(wold.get(), wnew.get()));
641
#else
642
0
    os_wrapper(std::string("rename ") + oldname + " " + newname, rename(oldname, newname));
643
0
#endif
644
0
}
645
646
void
647
QUtil::pipe_file(char const* filename, Pipeline* p)
648
0
{
649
    // Exercised in test suite by testing file_provider.
650
0
    FILE* f = safe_fopen(filename, "rb");
651
0
    FileCloser fc(f);
652
0
    size_t len = 0;
653
0
    int constexpr size = 8192;
654
0
    unsigned char buf[size];
655
0
    while ((len = fread(buf, 1, size, f)) > 0) {
656
0
        p->write(buf, len);
657
0
    }
658
0
    p->finish();
659
0
    if (ferror(f)) {
660
0
        throw std::runtime_error(std::string("failure reading file ") + filename);
661
0
    }
662
0
}
663
664
std::function<void(Pipeline*)>
665
QUtil::file_provider(std::string const& filename)
666
1.17k
{
667
1.17k
    return [filename](Pipeline* p) { pipe_file(filename.c_str(), p); };
668
1.17k
}
669
670
std::string
671
QUtil::path_basename(std::string const& filename)
672
0
{
673
#ifdef _WIN32
674
    char const* pathsep = "/\\";
675
#else
676
0
    char const* pathsep = "/";
677
0
#endif
678
0
    std::string last = filename;
679
0
    auto len = last.length();
680
0
    while (len > 1) {
681
0
        auto pos = last.find_last_of(pathsep);
682
0
        if (pos == len - 1) {
683
0
            last.pop_back();
684
0
            --len;
685
0
        } else if (pos == std::string::npos) {
686
0
            break;
687
0
        } else {
688
0
            last = last.substr(pos + 1);
689
0
            break;
690
0
        }
691
0
    }
692
0
    return last;
693
0
}
694
695
char*
696
QUtil::copy_string(std::string const& str)
697
0
{
698
0
    char* result = new char[str.length() + 1];
699
    // Use memcpy in case string contains nulls
700
0
    result[str.length()] = '\0';
701
0
    memcpy(result, str.c_str(), str.length());
702
0
    return result;
703
0
}
704
705
std::shared_ptr<char>
706
QUtil::make_shared_cstr(std::string const& str)
707
0
{
708
0
    auto result = QUtil::make_shared_array<char>(str.length() + 1);
709
    // Use memcpy in case string contains nulls
710
0
    result.get()[str.length()] = '\0';
711
0
    memcpy(result.get(), str.c_str(), str.length());
712
0
    return result;
713
0
}
714
715
std::unique_ptr<char[]>
716
QUtil::make_unique_cstr(std::string const& str)
717
0
{
718
0
    auto result = std::make_unique<char[]>(str.length() + 1);
719
    // Use memcpy in case string contains nulls
720
0
    result.get()[str.length()] = '\0';
721
0
    memcpy(result.get(), str.c_str(), str.length());
722
0
    return result;
723
0
}
724
725
std::string
726
QUtil::hex_encode(std::string const& input)
727
0
{
728
0
    static auto constexpr hexchars = "0123456789abcdef";
729
0
    std::string result;
730
0
    result.reserve(2 * input.length());
731
0
    for (const char c: input) {
732
0
        result += hexchars[static_cast<unsigned char>(c) >> 4];
733
0
        result += hexchars[c & 0x0f];
734
0
    }
735
0
    return result;
736
0
}
737
738
std::string
739
QUtil::hex_decode(std::string const& input)
740
1.10k
{
741
1.10k
    std::string result;
742
    // We know result.size() <= 0.5 * input.size() + 1. However, reserving string space for this
743
    // upper bound has a negative impact.
744
1.10k
    bool first = true;
745
1.10k
    char decoded;
746
23.0k
    for (auto ch: input) {
747
23.0k
        ch = util::hex_decode_char(ch);
748
23.0k
        if (ch < '\20') {
749
23.0k
            if (first) {
750
11.5k
                decoded = static_cast<char>(ch << 4);
751
11.5k
                first = false;
752
11.5k
            } else {
753
11.5k
                result.push_back(decoded | ch);
754
11.5k
                first = true;
755
11.5k
            }
756
23.0k
        }
757
23.0k
    }
758
1.10k
    if (!first) {
759
0
        result.push_back(decoded);
760
0
    }
761
1.10k
    return result;
762
1.10k
}
763
764
void
765
QUtil::binary_stdout()
766
0
{
767
#if defined(_WIN32) && defined(__BORLANDC__)
768
    setmode(_fileno(stdout), _O_BINARY);
769
#elif defined(_WIN32)
770
    _setmode(_fileno(stdout), _O_BINARY);
771
#endif
772
0
}
773
774
void
775
QUtil::binary_stdin()
776
0
{
777
#if defined(_WIN32) && defined(__BORLANDC__)
778
    setmode(_fileno(stdin), _O_BINARY);
779
#elif defined(_WIN32)
780
    _setmode(_fileno(stdin), _O_BINARY);
781
#endif
782
0
}
783
784
void
785
QUtil::setLineBuf(FILE* f)
786
0
{
787
0
#ifndef _WIN32
788
0
    setvbuf(f, reinterpret_cast<char*>(0), _IOLBF, 0);
789
0
#endif
790
0
}
791
792
char*
793
QUtil::getWhoami(char* argv0)
794
0
{
795
0
    char* whoami = nullptr;
796
0
    if (((whoami = strrchr(argv0, '/')) == nullptr) &&
797
0
        ((whoami = strrchr(argv0, '\\')) == nullptr)) {
798
0
        whoami = argv0;
799
0
    } else {
800
0
        ++whoami;
801
0
    }
802
803
0
    if ((strlen(whoami) > 4) && (strcmp(whoami + strlen(whoami) - 4, ".exe") == 0)) {
804
0
        whoami[strlen(whoami) - 4] = '\0';
805
0
    }
806
807
0
    return whoami;
808
0
}
809
810
bool
811
QUtil::get_env(std::string const& var, std::string* value)
812
0
{
813
    // This was basically ripped out of wxWindows.
814
#ifdef _WIN32
815
# ifdef NO_GET_ENVIRONMENT
816
    return false;
817
# else
818
    // first get the size of the buffer
819
    DWORD len = ::GetEnvironmentVariable(var.c_str(), NULL, 0);
820
    if (len == 0) {
821
        // this means that there is no such variable
822
        return false;
823
    }
824
825
    if (value) {
826
        auto t = QUtil::make_shared_array<char>(len + 1);
827
        ::GetEnvironmentVariable(var.c_str(), t.get(), len);
828
        *value = t.get();
829
    }
830
831
    return true;
832
# endif
833
#else
834
0
    char* p = getenv(var.c_str());
835
0
    if (p == nullptr) {
836
0
        return false;
837
0
    }
838
0
    if (value) {
839
0
        *value = p;
840
0
    }
841
842
0
    return true;
843
0
#endif
844
0
}
845
846
time_t
847
QUtil::get_current_time()
848
0
{
849
#ifdef _WIN32
850
    // The procedure to get local time at this resolution comes from
851
    // the Microsoft documentation.  It says to convert a SYSTEMTIME
852
    // to a FILETIME, and to copy the FILETIME to a ULARGE_INTEGER.
853
    // The resulting number is the number of 100-nanosecond intervals
854
    // between January 1, 1601 and now.  POSIX threads wants a time
855
    // based on January 1, 1970, so we adjust by subtracting the
856
    // number of seconds in that time period from the result we get
857
    // here.
858
    SYSTEMTIME sysnow;
859
    GetSystemTime(&sysnow);
860
    FILETIME filenow;
861
    SystemTimeToFileTime(&sysnow, &filenow);
862
    ULARGE_INTEGER uinow;
863
    uinow.LowPart = filenow.dwLowDateTime;
864
    uinow.HighPart = filenow.dwHighDateTime;
865
    ULONGLONG now = uinow.QuadPart;
866
    return static_cast<time_t>((now / 10000000ULL) - 11644473600ULL);
867
#else
868
0
    return time(nullptr);
869
0
#endif
870
0
}
871
872
QUtil::QPDFTime
873
QUtil::get_current_qpdf_time()
874
0
{
875
#ifdef _WIN32
876
    SYSTEMTIME ltime;
877
    GetLocalTime(&ltime);
878
    TIME_ZONE_INFORMATION tzinfo;
879
    GetTimeZoneInformation(&tzinfo);
880
    return QPDFTime(
881
        static_cast<int>(ltime.wYear),
882
        static_cast<int>(ltime.wMonth),
883
        static_cast<int>(ltime.wDay),
884
        static_cast<int>(ltime.wHour),
885
        static_cast<int>(ltime.wMinute),
886
        static_cast<int>(ltime.wSecond),
887
        // tzinfo.Bias is minutes before UTC
888
        static_cast<int>(tzinfo.Bias));
889
#else
890
0
    struct tm ltime;
891
0
    time_t now = time(nullptr);
892
0
    tzset();
893
0
# ifdef HAVE_LOCALTIME_R
894
0
    localtime_r(&now, &ltime);
895
# else
896
    ltime = *localtime(&now);
897
# endif
898
# if HAVE_TM_GMTOFF
899
    // tm_gmtoff is seconds after UTC
900
    int tzoff = -static_cast<int>(ltime.tm_gmtoff / 60);
901
# elif HAVE_EXTERN_LONG_TIMEZONE
902
    // timezone is seconds before UTC, not adjusted for daylight saving time
903
0
    int tzoff = static_cast<int>(timezone / 60);
904
# else
905
    // Don't know how to get timezone on this platform
906
    int tzoff = 0;
907
# endif
908
0
    return {
909
0
        static_cast<int>(ltime.tm_year + 1900),
910
0
        static_cast<int>(ltime.tm_mon + 1),
911
0
        static_cast<int>(ltime.tm_mday),
912
0
        static_cast<int>(ltime.tm_hour),
913
0
        static_cast<int>(ltime.tm_min),
914
0
        static_cast<int>(ltime.tm_sec),
915
0
        tzoff};
916
0
#endif
917
0
}
918
919
std::string
920
QUtil::qpdf_time_to_pdf_time(QPDFTime const& qtm)
921
0
{
922
0
    std::string tz_offset;
923
0
    int t = qtm.tz_delta;
924
0
    if (t == 0) {
925
0
        tz_offset = "Z";
926
0
    } else {
927
0
        if (t < 0) {
928
0
            t = -t;
929
0
            tz_offset += "+";
930
0
        } else {
931
0
            tz_offset += "-";
932
0
        }
933
0
        tz_offset += QUtil::int_to_string(t / 60, 2) + "'" + QUtil::int_to_string(t % 60, 2) + "'";
934
0
    }
935
0
    return (
936
0
        "D:" + QUtil::int_to_string(qtm.year, 4) + QUtil::int_to_string(qtm.month, 2) +
937
0
        QUtil::int_to_string(qtm.day, 2) + QUtil::int_to_string(qtm.hour, 2) +
938
0
        QUtil::int_to_string(qtm.minute, 2) + QUtil::int_to_string(qtm.second, 2) + tz_offset);
939
0
}
940
941
std::string
942
QUtil::qpdf_time_to_iso8601(QPDFTime const& qtm)
943
0
{
944
0
    std::string tz_offset;
945
0
    int t = qtm.tz_delta;
946
0
    if (t == 0) {
947
0
        tz_offset = "Z";
948
0
    } else {
949
0
        if (t < 0) {
950
0
            t = -t;
951
0
            tz_offset += "+";
952
0
        } else {
953
0
            tz_offset += "-";
954
0
        }
955
0
        tz_offset += QUtil::int_to_string(t / 60, 2) + ":" + QUtil::int_to_string(t % 60, 2);
956
0
    }
957
0
    return (
958
0
        QUtil::int_to_string(qtm.year, 4) + "-" + QUtil::int_to_string(qtm.month, 2) + "-" +
959
0
        QUtil::int_to_string(qtm.day, 2) + "T" + QUtil::int_to_string(qtm.hour, 2) + ":" +
960
0
        QUtil::int_to_string(qtm.minute, 2) + ":" + QUtil::int_to_string(qtm.second, 2) +
961
0
        tz_offset);
962
0
}
963
964
bool
965
QUtil::pdf_time_to_qpdf_time(std::string const& str, QPDFTime* qtm)
966
0
{
967
0
    static std::regex pdf_date(
968
0
        "^D:([0-9]{4})([0-9]{2})([0-9]{2})"
969
0
        "([0-9]{2})([0-9]{2})([0-9]{2})"
970
0
        "(?:(Z?)|([\\+\\-])([0-9]{2})'([0-9]{2})')$");
971
0
    std::smatch m;
972
0
    if (!std::regex_match(str, m, pdf_date)) {
973
0
        return false;
974
0
    }
975
0
    int tz_delta = 0;
976
0
    auto to_i = [](std::string const& s) { return QUtil::string_to_int(s.c_str()); };
977
978
0
    if (m[8] != "") {
979
0
        tz_delta = ((to_i(m[9]) * 60) + to_i(m[10]));
980
0
        if (m[8] == "+") {
981
0
            tz_delta = -tz_delta;
982
0
        }
983
0
    }
984
0
    if (qtm) {
985
0
        *qtm = QPDFTime(
986
0
            to_i(m[1]), to_i(m[2]), to_i(m[3]), to_i(m[4]), to_i(m[5]), to_i(m[6]), tz_delta);
987
0
    }
988
0
    return true;
989
0
}
990
991
bool
992
QUtil::pdf_time_to_iso8601(std::string const& pdf_time, std::string& iso8601)
993
0
{
994
0
    QPDFTime qtm;
995
0
    if (pdf_time_to_qpdf_time(pdf_time, &qtm)) {
996
0
        iso8601 = qpdf_time_to_iso8601(qtm);
997
0
        return true;
998
0
    }
999
0
    return false;
1000
0
}
1001
1002
std::string
1003
QUtil::toUTF8(unsigned long uval)
1004
29.9k
{
1005
29.9k
    std::string result;
1006
1007
    // A UTF-8 encoding of a Unicode value is a single byte for Unicode values <= 127.  For larger
1008
    // values, the first byte of the UTF-8 encoding has '1' as each of its n highest bits and '0'
1009
    // for its (n+1)th highest bit where n is the total number of bytes required.  Subsequent bytes
1010
    // start with '10' and have the remaining 6 bits free for encoding.  For example, an 11-bit
1011
    // Unicode value can be stored in two bytes where the first is 110zzzzz, the second is 10zzzzzz,
1012
    // and the z's represent the remaining bits.
1013
1014
29.9k
    if (uval > 0x7fffffff) {
1015
0
        throw std::runtime_error("bounds error in QUtil::toUTF8");
1016
29.9k
    } else if (uval < 128) {
1017
20.9k
        result += static_cast<char>(uval);
1018
20.9k
    } else {
1019
8.98k
        unsigned char bytes[7];
1020
8.98k
        bytes[6] = '\0';
1021
8.98k
        unsigned char* cur_byte = &bytes[5];
1022
1023
        // maximum value that will fit in the current number of bytes
1024
8.98k
        unsigned char maxval = 0x3f; // six bits
1025
1026
26.2k
        while (uval > QIntC::to_ulong(maxval)) {
1027
            // Assign low six bits plus 10000000 to lowest unused byte position, then shift
1028
17.2k
            *cur_byte = static_cast<unsigned char>(0x80 + (uval & 0x3f));
1029
17.2k
            uval >>= 6;
1030
            // Maximum that will fit in high byte now shrinks by one bit
1031
17.2k
            maxval = static_cast<unsigned char>(maxval >> 1);
1032
            // Slide to the left one byte
1033
17.2k
            if (cur_byte <= bytes) {
1034
0
                throw std::logic_error("QUtil::toUTF8: overflow error");
1035
0
            }
1036
17.2k
            --cur_byte;
1037
17.2k
        }
1038
        // If maxval is k bits long, the high (7 - k) bits of the resulting byte must be high.
1039
8.98k
        *cur_byte = static_cast<unsigned char>(QIntC::to_ulong(0xff - (1 + (maxval << 1))) + uval);
1040
1041
8.98k
        result += reinterpret_cast<char*>(cur_byte);
1042
8.98k
    }
1043
1044
29.9k
    return result;
1045
29.9k
}
1046
1047
std::string
1048
QUtil::toUTF16(unsigned long uval)
1049
14.5M
{
1050
14.5M
    std::string result;
1051
14.5M
    if ((uval >= 0xd800) && (uval <= 0xdfff)) {
1052
215
        result = "\xff\xfd";
1053
14.5M
    } else if (uval <= 0xffff) {
1054
14.5M
        char out[2];
1055
14.5M
        out[0] = static_cast<char>((uval & 0xff00) >> 8);
1056
14.5M
        out[1] = static_cast<char>(uval & 0xff);
1057
14.5M
        result = std::string(out, 2);
1058
14.5M
    } else if (uval <= 0x10ffff) {
1059
1.42k
        char out[4];
1060
1.42k
        uval -= 0x10000;
1061
1.42k
        unsigned short high = static_cast<unsigned short>(((uval & 0xffc00) >> 10) + 0xd800);
1062
1.42k
        unsigned short low = static_cast<unsigned short>((uval & 0x3ff) + 0xdc00);
1063
1.42k
        out[0] = static_cast<char>((high & 0xff00) >> 8);
1064
1.42k
        out[1] = static_cast<char>(high & 0xff);
1065
1.42k
        out[2] = static_cast<char>((low & 0xff00) >> 8);
1066
1.42k
        out[3] = static_cast<char>(low & 0xff);
1067
1.42k
        result = std::string(out, 4);
1068
2.99k
    } else {
1069
2.99k
        result = "\xff\xfd";
1070
2.99k
    }
1071
1072
14.5M
    return result;
1073
14.5M
}
1074
1075
// Random data support
1076
1077
namespace
1078
{
1079
    class RandomDataProviderProvider
1080
    {
1081
      public:
1082
        RandomDataProviderProvider();
1083
        void setProvider(RandomDataProvider*);
1084
        RandomDataProvider* getProvider();
1085
1086
      private:
1087
        RandomDataProvider* default_provider;
1088
        RandomDataProvider* current_provider{nullptr};
1089
    };
1090
} // namespace
1091
1092
RandomDataProviderProvider::RandomDataProviderProvider() :
1093
0
    default_provider(CryptoRandomDataProvider::getInstance())
1094
0
{
1095
0
    this->current_provider = default_provider;
1096
0
}
1097
1098
RandomDataProvider*
1099
RandomDataProviderProvider::getProvider()
1100
0
{
1101
0
    return this->current_provider;
1102
0
}
1103
1104
void
1105
RandomDataProviderProvider::setProvider(RandomDataProvider* p)
1106
0
{
1107
0
    this->current_provider = p ? p : this->default_provider;
1108
0
}
1109
1110
static RandomDataProviderProvider*
1111
getRandomDataProviderProvider()
1112
0
{
1113
    // Thread-safe static initializer
1114
0
    static RandomDataProviderProvider rdpp;
1115
0
    return &rdpp;
1116
0
}
1117
1118
void
1119
QUtil::setRandomDataProvider(RandomDataProvider* p)
1120
0
{
1121
0
    getRandomDataProviderProvider()->setProvider(p);
1122
0
}
1123
1124
RandomDataProvider*
1125
QUtil::getRandomDataProvider()
1126
0
{
1127
0
    return getRandomDataProviderProvider()->getProvider();
1128
0
}
1129
1130
void
1131
QUtil::initializeWithRandomBytes(unsigned char* data, size_t len)
1132
0
{
1133
0
    getRandomDataProvider()->provideRandomData(data, len);
1134
0
}
1135
1136
std::string
1137
util::random_string(size_t len)
1138
0
{
1139
0
    std::string result(len, '\0');
1140
0
    QUtil::initializeWithRandomBytes(reinterpret_cast<unsigned char*>(result.data()), len);
1141
0
    return result;
1142
0
}
1143
1144
long
1145
QUtil::random()
1146
0
{
1147
0
    long result = 0L;
1148
0
    initializeWithRandomBytes(reinterpret_cast<unsigned char*>(&result), sizeof(result));
1149
0
    return result;
1150
0
}
1151
1152
void
1153
QUtil::read_file_into_memory(char const* filename, std::shared_ptr<char>& file_buf, size_t& size)
1154
0
{
1155
0
    FILE* f = safe_fopen(filename, "rb");
1156
0
    FileCloser fc(f);
1157
0
    fseek(f, 0, SEEK_END);
1158
0
    size = QIntC::to_size(QUtil::tell(f));
1159
0
    fseek(f, 0, SEEK_SET);
1160
0
    file_buf = QUtil::make_shared_array<char>(size);
1161
0
    char* buf_p = file_buf.get();
1162
0
    size_t bytes_read = 0;
1163
0
    size_t len = 0;
1164
0
    while ((len = fread(buf_p + bytes_read, 1, size - bytes_read, f)) > 0) {
1165
0
        bytes_read += len;
1166
0
    }
1167
0
    if (bytes_read != size) {
1168
0
        if (ferror(f)) {
1169
0
            throw std::runtime_error(
1170
0
                std::string("failure reading file ") + filename + " into memory: read " +
1171
0
                uint_to_string(bytes_read) + "; wanted " + uint_to_string(size));
1172
0
        } else {
1173
0
            throw std::runtime_error(
1174
0
                std::string("premature eof reading file ") + filename + " into memory: read " +
1175
0
                uint_to_string(bytes_read) + "; wanted " + uint_to_string(size));
1176
0
        }
1177
0
    }
1178
0
}
1179
1180
std::string
1181
QUtil::read_file_into_string(char const* filename)
1182
0
{
1183
0
    FILE* f = safe_fopen(filename, "rb");
1184
0
    FileCloser fc(f);
1185
0
    return read_file_into_string(f, filename);
1186
0
}
1187
1188
std::string
1189
QUtil::read_file_into_string(FILE* f, std::string_view filename)
1190
0
{
1191
0
    fseek(f, 0, SEEK_END);
1192
0
    auto o_size = QUtil::tell(f);
1193
0
    if (o_size >= 0) {
1194
        // Seekable file
1195
0
        auto size = QIntC::to_size(o_size);
1196
0
        fseek(f, 0, SEEK_SET);
1197
0
        std::string result(size, '\0');
1198
0
        if (auto n_read = fread(result.data(), 1, size, f); n_read != size) {
1199
0
            if (ferror(f)) {
1200
0
                throw std::runtime_error(
1201
0
                    std::string("failure reading file ") + std::string(filename) +
1202
0
                    " into memory: read " + uint_to_string(n_read) + "; wanted " +
1203
0
                    uint_to_string(size));
1204
0
            } else {
1205
0
                throw std::runtime_error(
1206
0
                    std::string("premature eof reading file ") + std::string(filename) +
1207
0
                    " into memory: read " + uint_to_string(n_read) + "; wanted " +
1208
0
                    uint_to_string(size));
1209
0
            }
1210
0
        }
1211
0
        return result;
1212
0
    } else {
1213
        // Pipe or other non-seekable file
1214
0
        size_t buf_size = 8192;
1215
0
        auto n_read = buf_size;
1216
0
        std::string buffer(buf_size, '\0');
1217
0
        std::string result;
1218
0
        while (n_read == buf_size) {
1219
0
            n_read = fread(buffer.data(), 1, buf_size, f);
1220
0
            buffer.erase(n_read);
1221
0
            result.append(buffer);
1222
0
        }
1223
0
        if (ferror(f)) {
1224
0
            throw std::runtime_error(
1225
0
                std::string("failure reading file ") + std::string(filename) + " into memory");
1226
0
        }
1227
0
        return result;
1228
0
    }
1229
0
}
1230
1231
static bool
1232
read_char_from_FILE(char& ch, FILE* f)
1233
0
{
1234
0
    auto len = fread(&ch, 1, 1, f);
1235
0
    if (len == 0) {
1236
0
        if (ferror(f)) {
1237
0
            throw std::runtime_error("failure reading character from file");
1238
0
        }
1239
0
        return false;
1240
0
    }
1241
0
    return true;
1242
0
}
1243
1244
std::list<std::string>
1245
QUtil::read_lines_from_file(char const* filename, bool preserve_eol)
1246
0
{
1247
0
    std::list<std::string> lines;
1248
0
    FILE* f = safe_fopen(filename, "rb");
1249
0
    FileCloser fc(f);
1250
0
    auto next_char = [&f](char& ch) { return read_char_from_FILE(ch, f); };
1251
0
    read_lines_from_file(next_char, lines, preserve_eol);
1252
0
    return lines;
1253
0
}
1254
1255
std::list<std::string>
1256
QUtil::read_lines_from_file(std::istream& in, bool preserve_eol)
1257
0
{
1258
0
    std::list<std::string> lines;
1259
0
    auto next_char = [&in](char& ch) { return (in.get(ch)) ? true : false; };
1260
0
    read_lines_from_file(next_char, lines, preserve_eol);
1261
0
    return lines;
1262
0
}
1263
1264
std::list<std::string>
1265
QUtil::read_lines_from_file(FILE* f, bool preserve_eol)
1266
0
{
1267
0
    std::list<std::string> lines;
1268
0
    auto next_char = [&f](char& ch) { return read_char_from_FILE(ch, f); };
1269
0
    read_lines_from_file(next_char, lines, preserve_eol);
1270
0
    return lines;
1271
0
}
1272
1273
void
1274
QUtil::read_lines_from_file(
1275
    std::function<bool(char&)> next_char, std::list<std::string>& lines, bool preserve_eol)
1276
0
{
1277
0
    std::string* buf = nullptr;
1278
0
    char c;
1279
0
    while (next_char(c)) {
1280
0
        if (buf == nullptr) {
1281
0
            lines.emplace_back("");
1282
0
            buf = &(lines.back());
1283
0
            buf->reserve(80);
1284
0
        }
1285
1286
0
        if (buf->capacity() == buf->size()) {
1287
0
            buf->reserve(buf->capacity() * 2);
1288
0
        }
1289
0
        if (c == '\n') {
1290
0
            if (preserve_eol) {
1291
0
                buf->append(1, c);
1292
0
            } else {
1293
                // Remove any carriage return that preceded the newline and discard the newline
1294
0
                if ((!buf->empty()) && ((*(buf->rbegin())) == '\r')) {
1295
0
                    buf->erase(buf->length() - 1);
1296
0
                }
1297
0
            }
1298
0
            buf = nullptr;
1299
0
        } else {
1300
0
            buf->append(1, c);
1301
0
        }
1302
0
    }
1303
0
}
1304
1305
int
1306
QUtil::str_compare_nocase(char const* s1, char const* s2)
1307
0
{
1308
#if defined(_WIN32) && defined(__BORLANDC__)
1309
    return stricmp(s1, s2);
1310
#elif defined(_WIN32)
1311
    return _stricmp(s1, s2);
1312
#else
1313
0
    return strcasecmp(s1, s2);
1314
0
#endif
1315
0
}
1316
1317
std::vector<int>
1318
QUtil::parse_numrange(char const* range, int max)
1319
0
{
1320
    // Performance note: this implementation aims to be straightforward, not efficient. Numeric
1321
    // range parsing is used only during argument processing. It is not used during processing of
1322
    // PDF files.
1323
1324
0
    static std::regex group_re(R"((x)?(z|r?\d+)(?:-(z|r?\d+))?)");
1325
0
    auto parse_num = [&max](std::string const& s) -> int {
1326
0
        if (s == "z") {
1327
0
            return max;
1328
0
        }
1329
0
        int num;
1330
0
        if (s.at(0) == 'r') {
1331
0
            num = max + 1 - string_to_int(s.substr(1).c_str());
1332
0
        } else {
1333
0
            num = string_to_int(s.c_str());
1334
0
        }
1335
        // max == 0 means we don't know the max and are just testing for valid syntax.
1336
0
        if ((max > 0) && ((num < 1) || (num > max))) {
1337
0
            throw std::runtime_error("number " + std::to_string(num) + " out of range");
1338
0
        }
1339
0
        return num;
1340
0
    };
1341
1342
0
    auto populate = [](std::vector<int>& group, int first_num, bool is_span, int last_num) {
1343
0
        group.clear();
1344
0
        group.emplace_back(first_num);
1345
0
        if (is_span) {
1346
0
            if (first_num > last_num) {
1347
0
                for (auto i = first_num - 1; i >= last_num; --i) {
1348
0
                    group.push_back(i);
1349
0
                }
1350
0
            } else {
1351
0
                for (auto i = first_num + 1; i <= last_num; ++i) {
1352
0
                    group.push_back(i);
1353
0
                }
1354
0
            }
1355
0
        }
1356
0
    };
1357
1358
0
    char const* p;
1359
0
    try {
1360
0
        char const* range_end = range + strlen(range);
1361
0
        std::vector<int> result;
1362
0
        std::vector<int> last_group;
1363
        // See if range ends with :even or :odd.
1364
0
        size_t start_idx = 0;
1365
0
        size_t skip = 1;
1366
0
        p = std::find(range, range_end, ':');
1367
0
        if (*p == ':') {
1368
0
            if (strcmp(p, ":odd") == 0) {
1369
0
                skip = 2;
1370
0
            } else if (strcmp(p, ":even") == 0) {
1371
0
                skip = 2;
1372
0
                start_idx = 1;
1373
0
            } else {
1374
0
                throw std::runtime_error("expected :even or :odd");
1375
0
            }
1376
0
            range_end = p;
1377
0
        }
1378
1379
        // Divide the range into groups
1380
0
        p = range;
1381
0
        char const* group_end;
1382
0
        bool first = true;
1383
0
        while (p != range_end) {
1384
0
            group_end = std::find(p, range_end, ',');
1385
0
            std::cmatch m;
1386
0
            if (!std::regex_match(p, group_end, m, group_re)) {
1387
0
                throw std::runtime_error("invalid range syntax");
1388
0
            }
1389
0
            auto is_exclude = m[1].matched;
1390
0
            if (first && is_exclude) {
1391
0
                throw std::runtime_error("first range group may not be an exclusion");
1392
0
            }
1393
0
            first = false;
1394
0
            auto first_num = parse_num(m[2].str());
1395
0
            auto is_span = m[3].matched;
1396
0
            int last_num{0};
1397
0
            if (is_span) {
1398
0
                last_num = parse_num(m[3].str());
1399
0
            }
1400
0
            if (is_exclude) {
1401
0
                std::vector<int> work;
1402
0
                populate(work, first_num, is_span, last_num);
1403
0
                std::set<int> exclusions;
1404
0
                exclusions.insert(work.begin(), work.end());
1405
0
                work = last_group;
1406
0
                last_group.clear();
1407
0
                for (auto n: work) {
1408
0
                    if (!exclusions.contains(n)) {
1409
0
                        last_group.emplace_back(n);
1410
0
                    }
1411
0
                }
1412
0
            } else {
1413
0
                result.insert(result.end(), last_group.begin(), last_group.end());
1414
0
                populate(last_group, first_num, is_span, last_num);
1415
0
            }
1416
0
            p = group_end;
1417
0
            if (*p == ',') {
1418
0
                ++p;
1419
0
                if (p == range_end) {
1420
0
                    throw std::runtime_error("trailing comma");
1421
0
                }
1422
0
            }
1423
0
        }
1424
0
        result.insert(result.end(), last_group.begin(), last_group.end());
1425
0
        if (skip == 1) {
1426
0
            return result;
1427
0
        }
1428
0
        std::vector<int> filtered;
1429
0
        for (auto i = start_idx; i < result.size(); i += skip) {
1430
0
            filtered.emplace_back(result.at(i));
1431
0
        }
1432
0
        return filtered;
1433
0
    } catch (std::runtime_error const& e) {
1434
0
        std::string message;
1435
0
        if (p) {
1436
0
            message = "error at * in numeric range " +
1437
0
                std::string(range, QIntC::to_size(p - range)) + "*" + p + ": " + e.what();
1438
0
        } else {
1439
0
            message = "error in numeric range " + std::string(range) + ": " + e.what();
1440
0
        }
1441
0
        throw std::runtime_error(message);
1442
0
    }
1443
0
}
1444
1445
enum encoding_e { e_utf16, e_ascii, e_winansi, e_macroman, e_pdfdoc };
1446
1447
static unsigned char
1448
encode_winansi(unsigned long codepoint)
1449
0
{
1450
0
    auto i = unicode_to_win_ansi.find(codepoint);
1451
0
    if (i != unicode_to_win_ansi.end()) {
1452
0
        return i->second;
1453
0
    }
1454
0
    return '\0';
1455
0
}
1456
1457
static unsigned char
1458
encode_macroman(unsigned long codepoint)
1459
0
{
1460
0
    auto i = unicode_to_mac_roman.find(codepoint);
1461
0
    if (i != unicode_to_mac_roman.end()) {
1462
0
        return i->second;
1463
0
    }
1464
0
    return '\0';
1465
0
}
1466
1467
static unsigned char
1468
encode_pdfdoc(unsigned long codepoint)
1469
10.5k
{
1470
10.5k
    auto i = unicode_to_pdf_doc.find(codepoint);
1471
10.5k
    if (i != unicode_to_pdf_doc.end()) {
1472
566
        return i->second;
1473
566
    }
1474
10.0k
    return '\0';
1475
10.5k
}
1476
1477
unsigned long
1478
QUtil::get_next_utf8_codepoint(std::string const& utf8_val, size_t& pos, bool& error)
1479
30.5M
{
1480
30.5M
    auto o_pos = pos;
1481
30.5M
    size_t len = utf8_val.length();
1482
30.5M
    unsigned char ch = static_cast<unsigned char>(utf8_val.at(pos++));
1483
30.5M
    error = false;
1484
30.5M
    if (ch < 128) {
1485
29.0M
        return static_cast<unsigned long>(ch);
1486
29.0M
    }
1487
1488
1.45M
    size_t bytes_needed = 0;
1489
1.45M
    unsigned bit_check = 0x40;
1490
1.45M
    unsigned char to_clear = 0x80;
1491
5.28M
    while (ch & bit_check) {
1492
3.82M
        ++bytes_needed;
1493
3.82M
        to_clear = static_cast<unsigned char>(to_clear | bit_check);
1494
3.82M
        bit_check >>= 1;
1495
3.82M
    }
1496
1.45M
    if (((bytes_needed > 5) || (bytes_needed < 1)) || ((pos + bytes_needed) > len)) {
1497
1.07M
        error = true;
1498
1.07M
        return 0xfffd;
1499
1.07M
    }
1500
1501
378k
    auto codepoint = static_cast<unsigned long>(ch & ~to_clear);
1502
446k
    while (bytes_needed > 0) {
1503
412k
        --bytes_needed;
1504
412k
        ch = static_cast<unsigned char>(utf8_val.at(pos++));
1505
412k
        if ((ch & 0xc0) != 0x80) {
1506
345k
            --pos;
1507
345k
            error = true;
1508
345k
            return 0xfffd;
1509
345k
        }
1510
67.6k
        codepoint <<= 6;
1511
67.6k
        codepoint += (ch & 0x3f);
1512
67.6k
    }
1513
33.4k
    unsigned long lower_bound = 0;
1514
33.4k
    switch (pos - o_pos) {
1515
19.8k
    case 2:
1516
19.8k
        lower_bound = 1 << 7;
1517
19.8k
        break;
1518
4.65k
    case 3:
1519
4.65k
        lower_bound = 1 << 11;
1520
4.65k
        break;
1521
3.39k
    case 4:
1522
3.39k
        lower_bound = 1 << 16;
1523
3.39k
        break;
1524
4.28k
    case 5:
1525
4.28k
        lower_bound = 1 << 12;
1526
4.28k
        break;
1527
1.21k
    case 6:
1528
1.21k
        lower_bound = 1 << 26;
1529
1.21k
        break;
1530
0
    default:
1531
0
        lower_bound = 0;
1532
33.4k
    }
1533
1534
33.4k
    if (lower_bound > 0 && codepoint < lower_bound) {
1535
        // Too many bytes were used, but return whatever character was encoded.
1536
644
        error = true;
1537
644
    }
1538
33.4k
    return codepoint;
1539
33.4k
}
1540
1541
static bool
1542
transcode_utf8(std::string const& utf8_val, std::string& result, encoding_e encoding, char unknown)
1543
9.23k
{
1544
9.23k
    bool okay = true;
1545
9.23k
    result.clear();
1546
9.23k
    size_t len = utf8_val.length();
1547
9.23k
    switch (encoding) {
1548
3.67k
    case e_utf16:
1549
3.67k
        result += "\xfe\xff";
1550
3.67k
        break;
1551
5.56k
    case e_pdfdoc:
1552
        // We need to avoid having the result start with something that will be interpreted as
1553
        // UTF-16 or UTF-8, meaning we can't end up with a string that starts with "fe ff",
1554
        // (UTF-16-BE) "ff fe" (UTF-16-LE, not officially part of the PDF spec, but recognized by
1555
        // most readers including qpdf), or "ef bb bf" (UTF-8). It's more efficient to check the
1556
        // input string to see if it will map to one of those sequences than to check the output
1557
        // string since all cases start with the same starting character.
1558
5.56k
        if ((len >= 4) && (utf8_val[0] == '\xc3')) {
1559
2.17k
            static std::string fe_ff("\xbe\xc3\xbf");
1560
2.17k
            static std::string ff_fe("\xbf\xc3\xbe");
1561
2.17k
            static std::string ef_bb_bf("\xaf\xc2\xbb\xc2\xbf");
1562
            // C++-20 has starts_with, but when this was written, qpdf had a minimum supported
1563
            // version of C++-17.
1564
2.17k
            if ((utf8_val.compare(1, 3, fe_ff) == 0) || (utf8_val.compare(1, 3, ff_fe) == 0) ||
1565
1.78k
                (utf8_val.compare(1, 5, ef_bb_bf) == 0)) {
1566
733
                result += unknown;
1567
733
                okay = false;
1568
733
            }
1569
2.17k
        }
1570
5.56k
        break;
1571
0
    default:
1572
0
        break;
1573
9.23k
    }
1574
9.23k
    size_t pos = 0;
1575
30.5M
    while (pos < len) {
1576
30.5M
        bool error = false;
1577
30.5M
        unsigned long codepoint = QUtil::get_next_utf8_codepoint(utf8_val, pos, error);
1578
30.5M
        if (error) {
1579
1.42M
            okay = false;
1580
1.42M
            if (encoding == e_utf16) {
1581
710k
                result += "\xff\xfd";
1582
710k
            } else {
1583
710k
                result.append(1, unknown);
1584
710k
            }
1585
29.1M
        } else if (codepoint < 128) {
1586
29.0M
            char ch = static_cast<char>(codepoint);
1587
29.0M
            if (encoding == e_utf16) {
1588
14.5M
                result += QUtil::toUTF16(QIntC::to_ulong(ch));
1589
14.5M
            } else if ((encoding == e_pdfdoc) && (((ch >= 0x18) && (ch <= 0x1f)) || (ch == 127))) {
1590
                // PDFDocEncoding maps some low characters to Unicode, so if we encounter those
1591
                // invalid UTF-8 code points, map them to unknown so reversing the mapping doesn't
1592
                // change them into other characters.
1593
3.91k
                okay = false;
1594
3.91k
                result.append(1, unknown);
1595
14.5M
            } else {
1596
14.5M
                result.append(1, ch);
1597
14.5M
            }
1598
29.0M
        } else if (encoding == e_utf16) {
1599
15.8k
            result += QUtil::toUTF16(codepoint);
1600
16.9k
        } else if ((codepoint == 0xad) && (encoding == e_pdfdoc)) {
1601
            // PDFDocEncoding omits 0x00ad (soft hyphen).
1602
543
            okay = false;
1603
543
            result.append(1, unknown);
1604
16.4k
        } else if (
1605
16.4k
            (codepoint > 160) && (codepoint < 256) &&
1606
5.87k
            ((encoding == e_winansi) || (encoding == e_pdfdoc))) {
1607
5.87k
            result.append(1, static_cast<char>(codepoint & 0xff));
1608
10.5k
        } else {
1609
10.5k
            unsigned char ch = '\0';
1610
10.5k
            if (encoding == e_winansi) {
1611
0
                ch = encode_winansi(codepoint);
1612
10.5k
            } else if (encoding == e_macroman) {
1613
0
                ch = encode_macroman(codepoint);
1614
10.5k
            } else if (encoding == e_pdfdoc) {
1615
10.5k
                ch = encode_pdfdoc(codepoint);
1616
10.5k
            }
1617
10.5k
            if (ch == '\0') {
1618
10.0k
                okay = false;
1619
10.0k
                ch = static_cast<unsigned char>(unknown);
1620
10.0k
            }
1621
10.5k
            result.append(1, static_cast<char>(ch));
1622
10.5k
        }
1623
30.5M
    }
1624
9.23k
    return okay;
1625
9.23k
}
1626
1627
static std::string
1628
transcode_utf8(std::string const& utf8_val, encoding_e encoding, char unknown)
1629
3.67k
{
1630
3.67k
    std::string result;
1631
3.67k
    transcode_utf8(utf8_val, result, encoding, unknown);
1632
3.67k
    return result;
1633
3.67k
}
1634
1635
std::string
1636
QUtil::utf8_to_utf16(std::string const& utf8)
1637
3.67k
{
1638
3.67k
    return transcode_utf8(utf8, e_utf16, 0);
1639
3.67k
}
1640
1641
std::string
1642
QUtil::utf8_to_ascii(std::string const& utf8, char unknown_char)
1643
0
{
1644
0
    return transcode_utf8(utf8, e_ascii, unknown_char);
1645
0
}
1646
1647
std::string
1648
QUtil::utf8_to_win_ansi(std::string const& utf8, char unknown_char)
1649
0
{
1650
0
    return transcode_utf8(utf8, e_winansi, unknown_char);
1651
0
}
1652
1653
std::string
1654
QUtil::utf8_to_mac_roman(std::string const& utf8, char unknown_char)
1655
0
{
1656
0
    return transcode_utf8(utf8, e_macroman, unknown_char);
1657
0
}
1658
1659
std::string
1660
QUtil::utf8_to_pdf_doc(std::string const& utf8, char unknown_char)
1661
0
{
1662
0
    return transcode_utf8(utf8, e_pdfdoc, unknown_char);
1663
0
}
1664
1665
bool
1666
QUtil::utf8_to_ascii(std::string const& utf8, std::string& ascii, char unknown_char)
1667
0
{
1668
0
    return transcode_utf8(utf8, ascii, e_ascii, unknown_char);
1669
0
}
1670
1671
bool
1672
QUtil::utf8_to_win_ansi(std::string const& utf8, std::string& win, char unknown_char)
1673
0
{
1674
0
    return transcode_utf8(utf8, win, e_winansi, unknown_char);
1675
0
}
1676
1677
bool
1678
QUtil::utf8_to_mac_roman(std::string const& utf8, std::string& mac, char unknown_char)
1679
0
{
1680
0
    return transcode_utf8(utf8, mac, e_macroman, unknown_char);
1681
0
}
1682
1683
bool
1684
QUtil::utf8_to_pdf_doc(std::string const& utf8, std::string& pdfdoc, char unknown_char)
1685
5.56k
{
1686
5.56k
    return transcode_utf8(utf8, pdfdoc, e_pdfdoc, unknown_char);
1687
5.56k
}
1688
1689
bool
1690
QUtil::is_utf16(std::string const& val)
1691
0
{
1692
0
    return util::is_utf16(val);
1693
0
}
1694
1695
bool
1696
QUtil::is_explicit_utf8(std::string const& val)
1697
0
{
1698
0
    return util::is_explicit_utf8(val);
1699
0
}
1700
1701
std::string
1702
QUtil::utf16_to_utf8(std::string const& val)
1703
0
{
1704
0
    std::string result;
1705
    // This code uses unsigned long and unsigned short to hold codepoint values. It requires
1706
    // unsigned long to be at least 32 bits and unsigned short to be at least 16 bits, but it will
1707
    // work fine if they are larger.
1708
0
    unsigned long codepoint = 0L;
1709
0
    size_t len = val.length();
1710
0
    size_t start = 0;
1711
0
    bool is_le = false;
1712
0
    if (is_utf16(val)) {
1713
0
        if (static_cast<unsigned char>(val.at(0)) == 0xff) {
1714
0
            is_le = true;
1715
0
        }
1716
0
        start += 2;
1717
0
    }
1718
    // If the string has an odd number of bytes, the last byte is ignored.
1719
0
    for (size_t i = start; i + 1 < len; i += 2) {
1720
        // Convert from UTF16-BE.  If we get a malformed codepoint, this code will generate
1721
        // incorrect output without giving a warning.  Specifically, a high codepoint not followed
1722
        // by a low codepoint will be discarded, and a low codepoint not preceded by a high
1723
        // codepoint will just get its low 10 bits output.
1724
0
        auto msb = is_le ? i + 1 : i;
1725
0
        auto lsb = is_le ? i : i + 1;
1726
0
        unsigned short bits = QIntC::to_ushort(
1727
0
            (static_cast<unsigned char>(val.at(msb)) << 8) +
1728
0
            static_cast<unsigned char>(val.at(lsb)));
1729
0
        if ((bits & 0xFC00) == 0xD800) {
1730
0
            codepoint = 0x10000U + ((bits & 0x3FFU) << 10U);
1731
0
            continue;
1732
0
        } else if ((bits & 0xFC00) == 0xDC00) {
1733
0
            if (codepoint != 0) {
1734
0
                QTC::TC("qpdf", "QUtil non-trivial UTF-16");
1735
0
            }
1736
0
            codepoint += bits & 0x3FF;
1737
0
        } else {
1738
0
            codepoint = bits;
1739
0
        }
1740
1741
0
        result += QUtil::toUTF8(codepoint);
1742
0
        codepoint = 0;
1743
0
    }
1744
0
    return result;
1745
0
}
1746
1747
std::string
1748
QUtil::win_ansi_to_utf8(std::string const& val)
1749
0
{
1750
0
    std::string result;
1751
0
    size_t len = val.length();
1752
0
    for (unsigned int i = 0; i < len; ++i) {
1753
0
        unsigned char ch = static_cast<unsigned char>(val.at(i));
1754
0
        unsigned short ch_short = ch;
1755
0
        if ((ch >= 128) && (ch <= 160)) {
1756
0
            ch_short = win_ansi_to_unicode[ch - 128];
1757
0
        }
1758
0
        result += QUtil::toUTF8(ch_short);
1759
0
    }
1760
0
    return result;
1761
0
}
1762
1763
std::string
1764
QUtil::mac_roman_to_utf8(std::string const& val)
1765
0
{
1766
0
    std::string result;
1767
0
    size_t len = val.length();
1768
0
    for (unsigned int i = 0; i < len; ++i) {
1769
0
        unsigned char ch = static_cast<unsigned char>(val.at(i));
1770
0
        unsigned short ch_short = ch;
1771
0
        if (ch >= 128) {
1772
0
            ch_short = mac_roman_to_unicode[ch - 128];
1773
0
        }
1774
0
        result += QUtil::toUTF8(ch_short);
1775
0
    }
1776
0
    return result;
1777
0
}
1778
1779
std::string
1780
QUtil::pdf_doc_to_utf8(std::string const& val)
1781
0
{
1782
0
    std::string result;
1783
0
    size_t len = val.length();
1784
0
    for (unsigned int i = 0; i < len; ++i) {
1785
0
        unsigned char ch = static_cast<unsigned char>(val.at(i));
1786
0
        unsigned short ch_short = ch;
1787
0
        if ((ch >= 127) && (ch <= 160)) {
1788
0
            ch_short = pdf_doc_to_unicode[ch - 127];
1789
0
        } else if ((ch >= 24) && (ch <= 31)) {
1790
0
            ch_short = pdf_doc_low_to_unicode[ch - 24];
1791
0
        } else if (ch == 173) {
1792
0
            ch_short = 0xfffd;
1793
0
        }
1794
0
        result += QUtil::toUTF8(ch_short);
1795
0
    }
1796
0
    return result;
1797
0
}
1798
1799
void
1800
QUtil::analyze_encoding(
1801
    std::string const& val, bool& has_8bit_chars, bool& is_valid_utf8, bool& is_utf16)
1802
0
{
1803
0
    has_8bit_chars = is_utf16 = is_valid_utf8 = false;
1804
0
    if (QUtil::is_utf16(val)) {
1805
0
        has_8bit_chars = true;
1806
0
        is_utf16 = true;
1807
0
        return;
1808
0
    }
1809
0
    size_t len = val.length();
1810
0
    size_t pos = 0;
1811
0
    bool any_errors = false;
1812
0
    while (pos < len) {
1813
0
        bool error = false;
1814
0
        auto o_pos = pos;
1815
0
        get_next_utf8_codepoint(val, pos, error);
1816
0
        if (error) {
1817
0
            any_errors = true;
1818
0
        }
1819
0
        if (pos - o_pos > 1 || val[o_pos] & 0x80) {
1820
0
            has_8bit_chars = true;
1821
0
        }
1822
0
    }
1823
0
    if (has_8bit_chars && (!any_errors)) {
1824
0
        is_valid_utf8 = true;
1825
0
    }
1826
0
}
1827
1828
std::vector<std::string>
1829
QUtil::possible_repaired_encodings(std::string supplied)
1830
0
{
1831
0
    std::vector<std::string> result;
1832
    // Always include the original string
1833
0
    result.push_back(supplied);
1834
0
    bool has_8bit_chars = false;
1835
0
    bool is_valid_utf8 = false;
1836
0
    bool is_utf16 = false;
1837
0
    analyze_encoding(supplied, has_8bit_chars, is_valid_utf8, is_utf16);
1838
0
    if (!has_8bit_chars) {
1839
0
        return result;
1840
0
    }
1841
0
    if (is_utf16) {
1842
        // Convert to UTF-8 and pretend we got a UTF-8 string.
1843
0
        is_utf16 = false;
1844
0
        is_valid_utf8 = true;
1845
0
        supplied = utf16_to_utf8(supplied);
1846
0
    }
1847
0
    std::string output;
1848
0
    if (is_valid_utf8) {
1849
        // Maybe we were given UTF-8 but wanted one of the single-byte encodings.
1850
0
        if (utf8_to_pdf_doc(supplied, output)) {
1851
0
            result.push_back(output);
1852
0
        }
1853
0
        if (utf8_to_win_ansi(supplied, output)) {
1854
0
            result.push_back(output);
1855
0
        }
1856
0
        if (utf8_to_mac_roman(supplied, output)) {
1857
0
            result.push_back(output);
1858
0
        }
1859
0
    } else {
1860
        // Maybe we were given one of the single-byte encodings but wanted UTF-8.
1861
0
        std::string from_pdf_doc(pdf_doc_to_utf8(supplied));
1862
0
        result.push_back(from_pdf_doc);
1863
0
        std::string from_win_ansi(win_ansi_to_utf8(supplied));
1864
0
        result.push_back(from_win_ansi);
1865
0
        std::string from_mac_roman(mac_roman_to_utf8(supplied));
1866
0
        result.push_back(from_mac_roman);
1867
1868
        // Maybe we were given one of the other single-byte encodings but wanted one of the other
1869
        // ones.
1870
0
        if (utf8_to_win_ansi(from_pdf_doc, output)) {
1871
0
            result.push_back(output);
1872
0
        }
1873
0
        if (utf8_to_mac_roman(from_pdf_doc, output)) {
1874
0
            result.push_back(output);
1875
0
        }
1876
0
        if (utf8_to_pdf_doc(from_win_ansi, output)) {
1877
0
            result.push_back(output);
1878
0
        }
1879
0
        if (utf8_to_mac_roman(from_win_ansi, output)) {
1880
0
            result.push_back(output);
1881
0
        }
1882
0
        if (utf8_to_pdf_doc(from_mac_roman, output)) {
1883
0
            result.push_back(output);
1884
0
        }
1885
0
        if (utf8_to_win_ansi(from_mac_roman, output)) {
1886
0
            result.push_back(output);
1887
0
        }
1888
0
    }
1889
    // De-duplicate
1890
0
    std::vector<std::string> t;
1891
0
    std::set<std::string> seen;
1892
0
    for (auto const& iter: result) {
1893
0
        if (!seen.contains(iter)) {
1894
0
            seen.insert(iter);
1895
0
            t.push_back(iter);
1896
0
        }
1897
0
    }
1898
0
    return t;
1899
0
}
1900
1901
#ifndef QPDF_NO_WCHAR_T
1902
static int
1903
call_main_from_wmain(
1904
    bool, int argc, wchar_t const* const argv[], std::function<int(int, char*[])> realmain)
1905
0
{
1906
    // argv contains UTF-16-encoded strings with a 16-bit wchar_t. Convert this to UTF-8-encoded
1907
    // strings for compatibility with other systems. That way the rest of qpdf.cc can just act like
1908
    // arguments are UTF-8.
1909
1910
0
    std::vector<std::string> utf8_argv;
1911
0
    utf8_argv.reserve(QIntC::to_size(argc));
1912
0
    for (int i = 0; i < argc; ++i) {
1913
0
        std::string utf16;
1914
0
        for (size_t j = 0; j < std::wcslen(argv[i]); ++j) {
1915
0
            unsigned short codepoint = static_cast<unsigned short>(argv[i][j]);
1916
0
            utf16.append(1, static_cast<char>(QIntC::to_uchar(codepoint >> 8)));
1917
0
            utf16.append(1, static_cast<char>(QIntC::to_uchar(codepoint & 0xff)));
1918
0
        }
1919
0
        utf8_argv.emplace_back(QUtil::utf16_to_utf8(utf16));
1920
0
    }
1921
0
    std::vector<char*> new_argv;
1922
0
    new_argv.reserve(utf8_argv.size() + 1U);
1923
0
    for (auto const& arg: utf8_argv) {
1924
0
        new_argv.emplace_back(const_cast<char*>(arg.data()));
1925
0
    }
1926
0
    argc = QIntC::to_int(utf8_argv.size());
1927
0
    new_argv.emplace_back(nullptr);
1928
0
    return realmain(argc, new_argv.data());
1929
0
}
1930
1931
int
1932
QUtil::call_main_from_wmain(int argc, wchar_t* argv[], std::function<int(int, char*[])> realmain)
1933
0
{
1934
0
    return ::call_main_from_wmain(true, argc, argv, realmain);
1935
0
}
1936
1937
int
1938
QUtil::call_main_from_wmain(
1939
    int argc, wchar_t const* const argv[], std::function<int(int, char const* const[])> realmain)
1940
0
{
1941
0
    return ::call_main_from_wmain(true, argc, argv, [realmain](int new_argc, char* new_argv[]) {
1942
0
        return realmain(new_argc, new_argv);
1943
0
    });
1944
0
}
1945
1946
#endif // QPDF_NO_WCHAR_T
1947
1948
size_t
1949
QUtil::get_max_memory_usage()
1950
0
{
1951
0
#if defined(HAVE_MALLOC_INFO) && defined(HAVE_OPEN_MEMSTREAM)
1952
0
    static std::regex tag_re("<(/?\\w+)([^>]*?)>");
1953
0
    static std::regex attr_re("(\\w+)=\"(.*?)\"");
1954
1955
0
    char* buf;
1956
0
    size_t size;
1957
0
    FILE* f = open_memstream(&buf, &size);
1958
0
    if (f == nullptr) {
1959
0
        return 0;
1960
0
    }
1961
0
    malloc_info(0, f);
1962
0
    fclose(f);
1963
0
    if (QUtil::get_env("QPDF_DEBUG_MEM_USAGE")) {
1964
0
        fprintf(stderr, "%s", buf);
1965
0
    }
1966
1967
    // Warning: this code uses regular expression to extract data from an XML string. This is
1968
    // generally a bad idea, but we're going to do it anyway because QUtil.hh warns against using
1969
    // this function for other than development/testing, and if this function fails to generate
1970
    // reasonable output during performance testing, it will be noticed.
1971
1972
    // This is my best guess at how to interpret malloc_info. Anyway it seems to provide useful
1973
    // information for detecting code changes that drastically change memory usage.
1974
0
    size_t result = 0;
1975
0
    try {
1976
0
        std::cregex_iterator m_begin(buf, buf + size, tag_re);
1977
0
        std::cregex_iterator cr_end;
1978
0
        std::sregex_iterator sr_end;
1979
1980
0
        int in_heap = 0;
1981
0
        for (auto m = m_begin; m != cr_end; ++m) {
1982
0
            std::string tag(m->str(1));
1983
0
            if (tag == "heap") {
1984
0
                ++in_heap;
1985
0
            } else if (tag == "/heap") {
1986
0
                --in_heap;
1987
0
            } else if (in_heap == 0) {
1988
0
                std::string rest = m->str(2);
1989
0
                std::map<std::string, std::string> attrs;
1990
0
                std::sregex_iterator a_begin(rest.begin(), rest.end(), attr_re);
1991
0
                for (auto m2 = a_begin; m2 != sr_end; ++m2) {
1992
0
                    attrs[m2->str(1)] = m2->str(2);
1993
0
                }
1994
0
                if (tag == "total") {
1995
0
                    if (attrs.contains("size")) {
1996
0
                        result += QIntC::to_size(QUtil::string_to_ull(attrs["size"].c_str()));
1997
0
                    }
1998
0
                } else if (tag == "system" && attrs["type"] == "max") {
1999
0
                    result += QIntC::to_size(QUtil::string_to_ull(attrs["size"].c_str()));
2000
0
                }
2001
0
            }
2002
0
        }
2003
0
    } catch (...) {
2004
        // ignore -- just return 0
2005
0
    }
2006
0
    free(buf);
2007
0
    return result;
2008
#else
2009
    return 0;
2010
#endif
2011
0
}
2012
2013
char
2014
QUtil::hex_decode_char(char digit)
2015
0
{
2016
0
    return util::hex_decode_char(digit);
2017
0
}
2018
2019
std::string
2020
QUtil::hex_encode_char(char c)
2021
0
{
2022
0
    return util::hex_encode_char(c);
2023
0
}
2024
2025
bool
2026
QUtil::is_number(char const* p)
2027
0
{
2028
    // No longer used by qpdf.
2029
2030
    // ^[\+\-]?(\.\d*|\d+(\.\d*)?)$
2031
0
    if (!*p) {
2032
0
        return false;
2033
0
    }
2034
0
    if ((*p == '-') || (*p == '+')) {
2035
0
        ++p;
2036
0
    }
2037
0
    bool found_dot = false;
2038
0
    bool found_digit = false;
2039
0
    for (; *p; ++p) {
2040
0
        if (*p == '.') {
2041
0
            if (found_dot) {
2042
                // only one dot
2043
0
                return false;
2044
0
            }
2045
0
            found_dot = true;
2046
0
        } else if (util::is_digit(*p)) {
2047
0
            found_digit = true;
2048
0
        } else {
2049
0
            return false;
2050
0
        }
2051
0
    }
2052
0
    return found_digit;
2053
0
}
2054
2055
bool
2056
QUtil::is_space(char c)
2057
0
{
2058
0
    return util::is_space(c);
2059
0
}
2060
2061
bool
2062
QUtil::is_digit(char c)
2063
0
{
2064
0
    return util::is_digit(c);
2065
0
}
2066
2067
bool
2068
QUtil::is_hex_digit(char c)
2069
0
{
2070
0
    return util::is_hex_digit(c);
2071
0
}
2072
2073
void
2074
QUtil::handle_result_code(qpdf_result_e result, std::string_view context)
2075
0
{
2076
0
    if (result == qpdf_r_ok) {
2077
0
        return;
2078
0
    }
2079
0
    qpdf::util::assertion(
2080
0
        result == qpdf_r_bad_parameter,
2081
0
        "unexpected result code received from function in "s.append(context));
2082
0
    throw std::logic_error("invalid parameter supplied to function in "s.append(context));
2083
0
}