Coverage Report

Created: 2026-03-12 06:58

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/qpdf/include/qpdf/QUtil.hh
Line
Count
Source
1
// Copyright (c) 2005-2021 Jay Berkenbilt
2
// Copyright (c) 2022-2026 Jay Berkenbilt and Manfred Holger
3
//
4
// This file is part of qpdf.
5
//
6
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
7
// in compliance with the License. You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing, software distributed under the License
12
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
13
// or implied. See the License for the specific language governing permissions and limitations under
14
// the License.
15
//
16
// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic
17
// License. At your option, you may continue to consider qpdf to be licensed under those terms.
18
// Please see the manual for additional information.
19
20
#ifndef QUTIL_HH
21
#define QUTIL_HH
22
23
#include <qpdf/Constants.h>
24
#include <qpdf/DLL.h>
25
#include <qpdf/Types.h>
26
27
#include <cstdio>
28
#include <cstring>
29
#include <ctime>
30
#include <functional>
31
#include <list>
32
#include <memory>
33
#include <stdexcept>
34
#include <string>
35
#include <vector>
36
37
class RandomDataProvider;
38
class Pipeline;
39
40
namespace QUtil
41
{
42
    // This is a collection of useful utility functions that don't really go anywhere else.
43
    QPDF_DLL
44
    std::string int_to_string(long long, int length = 0);
45
    QPDF_DLL
46
    std::string uint_to_string(unsigned long long, int length = 0);
47
    QPDF_DLL
48
    std::string int_to_string_base(long long, int base, int length = 0);
49
    QPDF_DLL
50
    std::string uint_to_string_base(unsigned long long, int base, int length = 0);
51
    QPDF_DLL
52
    std::string double_to_string(double, int decimal_places = 0, bool trim_trailing_zeroes = true);
53
54
    // These string to number methods throw std::runtime_error on underflow/overflow.
55
    QPDF_DLL
56
    long long string_to_ll(char const* str);
57
    QPDF_DLL
58
    int string_to_int(char const* str);
59
    QPDF_DLL
60
    unsigned long long string_to_ull(char const* str);
61
    QPDF_DLL
62
    unsigned int string_to_uint(char const* str);
63
64
    // Returns true if this exactly represents a long long. The determination is made by converting
65
    // the string to a long long, then converting the result back to a string, and then comparing
66
    // that result with the original string.
67
    QPDF_DLL
68
    bool is_long_long(char const* str);
69
70
    // Pipeline's write method wants unsigned char*, but we often have some other type of string.
71
    // These methods do combinations of const_cast and reinterpret_cast to give us an unsigned
72
    // char*. They should only be used when it is known that it is safe. None of the pipelines in
73
    // qpdf modify the data passed to them, so within qpdf, it should always be safe.
74
    QPDF_DLL
75
    unsigned char* unsigned_char_pointer(std::string const& str);
76
    QPDF_DLL
77
    unsigned char* unsigned_char_pointer(char const* str);
78
79
    // Throw QPDFSystemError, which is derived from std::runtime_error, with a string formed by
80
    // appending to "description: " the standard string corresponding to the current value of errno.
81
    // You can retrieve the value of errno by calling getErrno() on the QPDFSystemError. Prior to
82
    // qpdf 8.2.0, this method threw system::runtime_error directly, but since QPDFSystemError is
83
    // derived from system::runtime_error, old code that specifically catches std::runtime_error
84
    // will still work.
85
    QPDF_DLL
86
    void throw_system_error(std::string const& description);
87
88
    // The status argument is assumed to be the return value of a standard library call that sets
89
    // errno when it fails.  If status is -1, convert the current value of errno to a
90
    // std::runtime_error that includes the standard error string. Otherwise, return status.
91
    QPDF_DLL
92
    int os_wrapper(std::string const& description, int status);
93
94
    // If the open fails, throws std::runtime_error. Otherwise, the FILE* is returned. The filename
95
    // should be UTF-8 encoded, even on Windows. It will be converted as needed on Windows.
96
    QPDF_DLL
97
    FILE* safe_fopen(char const* filename, char const* mode);
98
99
    // The FILE* argument is assumed to be the return of fopen. If null, throw std::runtime_error.
100
    // Otherwise, return the FILE* argument.
101
    QPDF_DLL
102
    FILE* fopen_wrapper(std::string const&, FILE*);
103
104
    // This is a little class to help with automatic closing files. You can do something like
105
    //
106
    // QUtil::FileCloser fc(QUtil::safe_fopen(filename, "rb"));
107
    //
108
    // and then use fc.f to the file. Be sure to actually declare a variable of type FileCloser.
109
    // Using it as a temporary won't work because it will close the file as soon as it goes out of
110
    // scope.
111
    class FileCloser
112
    {
113
      public:
114
        FileCloser(FILE* f) :
115
1
            f(f)
116
1
        {
117
1
        }
118
119
        ~FileCloser()
120
1
        {
121
1
            if (f) {
122
1
                fclose(f);
123
1
                f = nullptr;
124
1
            }
125
1
        }
126
127
        FILE* f;
128
    };
129
130
    // Attempt to open the file read only and then close again
131
    QPDF_DLL
132
    bool file_can_be_opened(char const* filename);
133
134
    // Wrap around off_t versions of fseek and ftell if available
135
    QPDF_DLL
136
    int seek(FILE* stream, qpdf_offset_t offset, int whence);
137
    QPDF_DLL
138
    qpdf_offset_t tell(FILE* stream);
139
140
    QPDF_DLL
141
    bool same_file(char const* name1, char const* name2);
142
143
    QPDF_DLL
144
    void remove_file(char const* path);
145
146
    // rename_file will overwrite newname if it exists
147
    QPDF_DLL
148
    void rename_file(char const* oldname, char const* newname);
149
150
    // Write the contents of filename as a binary file to the pipeline.
151
    QPDF_DLL
152
    void pipe_file(char const* filename, Pipeline* p);
153
154
    // Return a function that will send the contents of the given file through the given pipeline as
155
    // binary data.
156
    QPDF_DLL
157
    std::function<void(Pipeline*)> file_provider(std::string const& filename);
158
159
    // Return the last path element. On Windows, either / or \ are path separators. Otherwise, only
160
    // / is a path separator. Strip any trailing path separators. Then, if any path separators
161
    // remain, return everything after the last path separator. Otherwise, return the whole string.
162
    // As a special case, if a string consists entirely of path separators, the first character is
163
    // returned.
164
    QPDF_DLL
165
    std::string path_basename(std::string const& filename);
166
167
    // Returns a dynamically allocated copy of a string that the caller has to delete with delete[].
168
    QPDF_DLL
169
    char* copy_string(std::string const&);
170
171
    // Returns a shared_ptr<char> with the correct deleter.
172
    QPDF_DLL
173
    std::shared_ptr<char> make_shared_cstr(std::string const&);
174
175
    // Copy string as a unique_ptr to an array.
176
    QPDF_DLL
177
    std::unique_ptr<char[]> make_unique_cstr(std::string const&);
178
179
    // Create a shared pointer to an array. From c++20, std::make_shared<T[]>(n) does this.
180
    template <typename T>
181
    std::shared_ptr<T>
182
    make_shared_array(size_t n)
183
7.60k
    {
184
7.60k
        return std::shared_ptr<T>(new T[n], std::default_delete<T[]>());
185
7.60k
    }
std::__1::shared_ptr<unsigned char> QUtil::make_shared_array<unsigned char>(unsigned long)
Line
Count
Source
183
7.60k
    {
184
7.60k
        return std::shared_ptr<T>(new T[n], std::default_delete<T[]>());
185
7.60k
    }
Unexecuted instantiation: std::__1::shared_ptr<char> QUtil::make_shared_array<char>(unsigned long)
186
187
    // Returns lower-case hex-encoded version of the string, treating each character in the input
188
    // string as unsigned.  The output string will be twice as long as the input string.
189
    QPDF_DLL
190
    std::string hex_encode(std::string const&);
191
192
    // Returns lower-case hex-encoded version of the char including a leading "#".
193
    QPDF_DLL
194
    std::string hex_encode_char(char);
195
196
    // Returns a string that is the result of decoding the input string. The input string may
197
    // consist of mixed case hexadecimal digits. Any characters that are not hexadecimal digits will
198
    // be silently ignored. If there are an odd number of hexadecimal digits, a trailing 0 will be
199
    // assumed.
200
    QPDF_DLL
201
    std::string hex_decode(std::string const&);
202
203
    // Decode a single hex digit into a char in the range 0 <= char < 16. Return a char >= 16 if
204
    // digit is not a valid hex digit.
205
    QPDF_DLL
206
    char hex_decode_char(char digit);
207
208
    // Set stdin, stdout to binary mode
209
    QPDF_DLL
210
    void binary_stdout();
211
    QPDF_DLL
212
    void binary_stdin();
213
    // Set stdout to line buffered
214
    QPDF_DLL
215
    void setLineBuf(FILE*);
216
217
    // May modify argv0
218
    QPDF_DLL
219
    char* getWhoami(char* argv0);
220
221
    // Get the value of an environment variable in a portable fashion. Returns true iff the variable
222
    // is defined.  If `value' is non-null, initializes it with the value of the variable.
223
    QPDF_DLL
224
    bool get_env(std::string const& var, std::string* value = nullptr);
225
226
    QPDF_DLL
227
    time_t get_current_time();
228
229
    // Portable structure representing a point in time with second granularity and time zone offset.
230
    struct QPDFTime
231
    {
232
        QPDFTime() = default;
233
        QPDFTime(QPDFTime const&) = default;
234
        QPDFTime& operator=(QPDFTime const&) = default;
235
        QPDFTime(int year, int month, int day, int hour, int minute, int second, int tz_delta) :
236
0
            year(year),
237
0
            month(month),
238
0
            day(day),
239
0
            hour(hour),
240
0
            minute(minute),
241
0
            second(second),
242
0
            tz_delta(tz_delta)
243
0
        {
244
0
        }
245
        int year;  // actual year, no 1900 stuff
246
        int month; // 1--12
247
        int day;   // 1--31
248
        int hour;
249
        int minute;
250
        int second;
251
        int tz_delta; // minutes before UTC
252
    };
253
254
    QPDF_DLL
255
    QPDFTime get_current_qpdf_time();
256
257
    // Convert a QPDFTime structure to a PDF timestamp string, which is "D:yyyymmddhhmmss<z>" where
258
    // <z> is either "Z" for UTC or "-hh'mm'" or "+hh'mm'" for timezone offset. <z> may also be
259
    // omitted.
260
    // Examples: "D:20210207161528-05'00'", "D:20210207211528Z", "D:20210207211528".
261
    // See get_current_qpdf_time and the QPDFTime structure above.
262
    QPDF_DLL
263
    std::string qpdf_time_to_pdf_time(QPDFTime const&);
264
265
    // Convert QPDFTime to a second-granularity ISO-8601 timestamp.
266
    QPDF_DLL
267
    std::string qpdf_time_to_iso8601(QPDFTime const&);
268
269
    // Convert a PDF timestamp string to a QPDFTime. If syntactically valid, return true and fill in
270
    // qtm. If not valid, return false, and do not modify qtm. If qtm is null, just check the
271
    // validity of the string.
272
    QPDF_DLL
273
    bool pdf_time_to_qpdf_time(std::string const&, QPDFTime* qtm = nullptr);
274
275
    // Convert PDF timestamp to a second-granularity ISO-8601 timestamp. If syntactically valid,
276
    // return true and initialize iso8601. Otherwise, return false.
277
    bool pdf_time_to_iso8601(std::string const& pdf_time, std::string& iso8601);
278
279
    // Return a string containing the byte representation of the UTF-8 encoding for the unicode
280
    // value passed in.
281
    QPDF_DLL
282
    std::string toUTF8(unsigned long uval);
283
284
    // Return a string containing the byte representation of the UTF-16 big-endian encoding for the
285
    // unicode value passed in. Unrepresentable code points are converted to U+FFFD.
286
    QPDF_DLL
287
    std::string toUTF16(unsigned long uval);
288
289
    // If utf8_val.at(pos) points to the beginning of a valid UTF-8-encoded character, return the
290
    // codepoint of the character and set error to false. Otherwise, return 0xfffd and set error to
291
    // true. In all cases, pos is advanced to the next position that may begin a valid character.
292
    // When the string has been consumed, pos will be set to the string length. It is an error to
293
    // pass a value of pos that is greater than or equal to the length of the string.
294
    QPDF_DLL
295
    unsigned long get_next_utf8_codepoint(std::string const& utf8_val, size_t& pos, bool& error);
296
297
    // Test whether this is a UTF-16 string. This is indicated by first two bytes being 0xFE 0xFF
298
    // (big-endian) or 0xFF 0xFE (little-endian), each of which is the encoding of U+FEFF, the
299
    // Unicode marker. Starting in qpdf 10.6.2, this detects little-endian as well as big-endian.
300
    // Even though the PDF spec doesn't allow little-endian, most readers seem to accept it.
301
    QPDF_DLL
302
    bool is_utf16(std::string const&);
303
304
    // Test whether this is an explicit UTF-8 string as allowed by the PDF 2.0 spec. This is
305
    // indicated by first three bytes being 0xEF 0xBB 0xBF, which is the UTF-8 encoding of U+FEFF.
306
    QPDF_DLL
307
    bool is_explicit_utf8(std::string const&);
308
309
    // Convert a UTF-8 encoded string to UTF-16 big-endian. Unrepresentable code points are
310
    // converted to U+FFFD.
311
    QPDF_DLL
312
    std::string utf8_to_utf16(std::string const& utf8);
313
314
    // Convert a UTF-8 encoded string to the specified single-byte encoding system by replacing all
315
    // unsupported characters with the given unknown_char.
316
    QPDF_DLL
317
    std::string utf8_to_ascii(std::string const& utf8, char unknown_char = '?');
318
    QPDF_DLL
319
    std::string utf8_to_win_ansi(std::string const& utf8, char unknown_char = '?');
320
    QPDF_DLL
321
    std::string utf8_to_mac_roman(std::string const& utf8, char unknown_char = '?');
322
    QPDF_DLL
323
    std::string utf8_to_pdf_doc(std::string const& utf8, char unknown_char = '?');
324
325
    // These versions return true if the conversion was successful and false if any unrepresentable
326
    // characters were found and had to be substituted with the unknown character.
327
    QPDF_DLL
328
    bool utf8_to_ascii(std::string const& utf8, std::string& ascii, char unknown_char = '?');
329
    QPDF_DLL
330
    bool utf8_to_win_ansi(std::string const& utf8, std::string& win, char unknown_char = '?');
331
    QPDF_DLL
332
    bool utf8_to_mac_roman(std::string const& utf8, std::string& mac, char unknown_char = '?');
333
    QPDF_DLL
334
    bool utf8_to_pdf_doc(std::string const& utf8, std::string& pdfdoc, char unknown_char = '?');
335
336
    // Convert a UTF-16 encoded string to UTF-8. Unrepresentable code
337
    // points are converted to U+FFFD.
338
    QPDF_DLL
339
    std::string utf16_to_utf8(std::string const& utf16);
340
341
    // Convert from the specified single-byte encoding system to UTF-8. There is no ascii_to_utf8
342
    // because all ASCII strings are already valid UTF-8.
343
    QPDF_DLL
344
    std::string win_ansi_to_utf8(std::string const& win);
345
    QPDF_DLL
346
    std::string mac_roman_to_utf8(std::string const& mac);
347
    QPDF_DLL
348
    std::string pdf_doc_to_utf8(std::string const& pdfdoc);
349
350
    // Analyze a string for encoding. We can't tell the difference between any single-byte
351
    // encodings, and we can't tell for sure whether a string that happens to be valid UTF-8 isn't a
352
    // different encoding, but we can at least tell a few things to help us guess. If there are no
353
    // characters with the high bit set, has_8bit_chars is false, and the other values are also
354
    // false, even though ASCII strings are valid UTF-8. is_valid_utf8 means that the string is
355
    // non-trivially valid UTF-8. Although the PDF spec requires UTF-16 to be UTF-16BE, qpdf (and
356
    // just about everything else) accepts UTF-16LE (as of 10.6.2).
357
    QPDF_DLL
358
    void analyze_encoding(
359
        std::string const& str, bool& has_8bit_chars, bool& is_valid_utf8, bool& is_utf16);
360
361
    // Try to compensate for previously incorrectly encoded strings. We want to compensate for the
362
    // following errors:
363
    //
364
    // * The string was supposed to be UTF-8 but was one of the single-byte encodings
365
    // * The string was supposed to be PDF Doc but was either UTF-8 or one of the other single-byte
366
    //   encodings
367
    //
368
    // The returned vector always contains the original string first, and then it contains what the
369
    // correct string would be in the event that the original string was the result of any of the
370
    // above errors.
371
    //
372
    // This method is useful for attempting to recover a password that may have been previously
373
    // incorrectly encoded. For example, the password was supposed to be UTF-8 but the previous
374
    // application used a password encoded in WinAnsi, or if the previous password was supposed to
375
    // be PDFDoc but was actually given as UTF-8 or WinAnsi, this method would find the correct
376
    // password.
377
    QPDF_DLL
378
    std::vector<std::string> possible_repaired_encodings(std::string);
379
380
    // Return a cryptographically secure random number.
381
    QPDF_DLL
382
    long random();
383
384
    // Initialize a buffer with cryptographically secure random bytes.
385
    QPDF_DLL
386
    void initializeWithRandomBytes(unsigned char* data, size_t len);
387
388
    // Supply a random data provider. Starting in qpdf 10.0.0, qpdf uses the crypto provider as its
389
    // source of random numbers. If you are using the native crypto provider, then qpdf will either
390
    // use the operating system's secure random number source or, only if enabled at build time, an
391
    // insecure random source from stdlib. The caller is responsible for managing the memory for the
392
    // RandomDataProvider. This method modifies a static variable. If you are providing your own
393
    // random data provider, you should call this at the beginning of your program before creating
394
    // any QPDF objects. Passing a null to this method will reset the library back to its default
395
    // random data provider.
396
    QPDF_DLL
397
    void setRandomDataProvider(RandomDataProvider*);
398
399
    // This returns the random data provider that would be used the next time qpdf needs random
400
    // data.  It will never return null. If no random data provider has been provided and the
401
    // library was not compiled with any random data provider available, an exception will be
402
    // thrown.
403
    QPDF_DLL
404
    RandomDataProvider* getRandomDataProvider();
405
406
    // Filename is UTF-8 encoded, even on Windows, as described in the comments for safe_fopen.
407
    QPDF_DLL
408
    std::list<std::string> read_lines_from_file(char const* filename, bool preserve_eol = false);
409
    QPDF_DLL
410
    std::list<std::string> read_lines_from_file(std::istream&, bool preserve_eol = false);
411
    QPDF_DLL
412
    std::list<std::string> read_lines_from_file(FILE*, bool preserve_eol = false);
413
    QPDF_DLL
414
    void read_lines_from_file(
415
        std::function<bool(char&)> next_char,
416
        std::list<std::string>& lines,
417
        bool preserve_eol = false);
418
419
    QPDF_DLL
420
    void read_file_into_memory(char const* filename, std::shared_ptr<char>& file_buf, size_t& size);
421
422
    QPDF_DLL
423
    std::string read_file_into_string(char const* filename);
424
    QPDF_DLL
425
    std::string read_file_into_string(FILE* f, std::string_view filename = "");
426
427
    // This used to be called strcasecmp, but that is a macro on some platforms, so we have to give
428
    // it a name that is not likely to be a macro anywhere.
429
    QPDF_DLL
430
    int str_compare_nocase(char const*, char const*);
431
432
    // These routines help the tokenizer recognize certain character classes without using ctype,
433
    // which we avoid because of locale considerations.
434
    QPDF_DLL
435
    bool is_hex_digit(char);
436
437
    QPDF_DLL
438
    bool is_space(char);
439
440
    QPDF_DLL
441
    bool is_digit(char);
442
443
    QPDF_DLL
444
    bool is_number(char const*);
445
446
    /// @brief  Handles the result code from qpdf functions.
447
    ///
448
    ///         **For qpdf internal use only - not part of the public API**
449
    /// @par
450
    ///         Depending on the result code, either continues execution or throws an
451
    ///         exception in case of an invalid parameter.
452
    ///
453
    /// @param  result The result code of type qpdf_result_e, indicating success or failure status.
454
    /// @param context A string describing the context where this function is invoked, used for
455
    ///                error reporting if an exception is thrown.
456
    ///
457
    /// @throws std::logic_error If the result code is `qpdf_bad_parameter`, indicating an invalid
458
    ///                          parameter was supplied to a function. The exception message will
459
    ///                          include the provided context for easier debugging.
460
    ///
461
    /// @since 12.3
462
    QPDF_DLL
463
    void handle_result_code(qpdf_result_e result, std::string_view context);
464
465
    // This method parses the numeric range syntax used by the qpdf command-line tool. May throw
466
    // std::runtime_error. A numeric range is as comma-separated list of groups. A group may be a
467
    // number specification or a range of number specifications separated by a dash. A number
468
    // specification may be one of the following (where <n> is a number):
469
    // * <n> -- the numeric value of n
470
    // * z -- the value of the `max` parameter
471
    // * r<n> -- represents max + 1 - <n> (<n> from the end)
472
    //
473
    // If the group is two number specifications separated by a dash, it represents the range of
474
    // numbers from the first to the second, inclusive. If the first is greater than the second, the
475
    // numbers are descending.
476
    //
477
    // From qpdf 11.7.1: if a group starts with `x`, its members are excluded from the previous
478
    // group that didn't start with `x1.
479
    //
480
    // Example: with max of 15, the range "4-10,x7-9,12-8,xr5" is 4, 5, 6, 10, 12, 10, 9, 8. This is
481
    // 4 through 10 inclusive without 7 through 9 inclusive followed by 12 to 8 inclusive
482
    // (descending) without 11 (the fifth value counting backwards from 15). For more information
483
    // and additional examples, see the "Page Ranges" section in the manual.
484
    QPDF_DLL
485
    std::vector<int> parse_numrange(char const* range, int max);
486
487
#ifndef QPDF_NO_WCHAR_T
488
    // If you are building qpdf on a stripped down system that doesn't have wchar_t, such as may be
489
    // the case in some embedded environments, you may define QPDF_NO_WCHAR_T in your build. This
490
    // symbol is never defined automatically. Search for wchar_t in qpdf's top-level README.md file
491
    // for details.
492
493
    // Take an argv array consisting of wchar_t, as when wmain is invoked, convert all UTF-16
494
    // encoded strings to UTF-8, and call another main.
495
    QPDF_DLL
496
    int call_main_from_wmain(int argc, wchar_t* argv[], std::function<int(int, char*[])> realmain);
497
    QPDF_DLL
498
    int call_main_from_wmain(
499
        int argc,
500
        wchar_t const* const argv[],
501
        std::function<int(int, char const* const[])> realmain);
502
#endif // QPDF_NO_WCHAR_T
503
504
    // Try to return the maximum amount of memory allocated by the current process and its threads.
505
    // Return 0 if unable to determine. This is Linux-specific and not implemented to be completely
506
    // reliable. It is used during development for performance testing to detect changes that may
507
    // significantly change memory usage. It is not recommended for use for other purposes.
508
    QPDF_DLL
509
    size_t get_max_memory_usage();
510
}; // namespace QUtil
511
512
#endif // QUTIL_HH