Coverage Report

Created: 2026-03-07 06:26

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/qpdf/libqpdf/QPDF_String.cc
Line
Count
Source
1
#include <qpdf/QPDFObject_private.hh>
2
3
#include <qpdf/QPDFObjectHandle_private.hh>
4
#include <qpdf/QUtil.hh>
5
#include <qpdf/Util.hh>
6
7
// DO NOT USE ctype -- it is locale dependent for some things, and it's not worth the risk of
8
// including it in case it may accidentally be used.
9
10
static bool
11
is_iso_latin1_printable(char ch)
12
13.5M
{
13
13.5M
    return (ch >= 32 && ch <= 126) || static_cast<unsigned char>(ch) >= 160;
14
13.5M
}
15
16
void
17
QPDF_String::writeJSON(int json_version, JSON::Writer& p)
18
0
{
19
0
    if (json_version == 1) {
20
0
        if (util::is_utf16(val)) {
21
0
            p << "\"" << JSON::Writer::encode_string(QUtil::utf16_to_utf8(val)) << "\"";
22
0
            return;
23
0
        }
24
0
        if (util::is_explicit_utf8(val)) {
25
            // PDF 2.0 allows UTF-8 strings when explicitly prefixed with the three-byte
26
            // representation of U+FEFF.
27
0
            p << "\"" << JSON::Writer::encode_string(val.substr(3)) << "\"";
28
0
            return;
29
0
        }
30
0
        p << "\"" << JSON::Writer::encode_string(QUtil::pdf_doc_to_utf8(val)) << "\"";
31
0
        return;
32
0
    }
33
    // See if we can unambiguously represent as Unicode.
34
0
    if (util::is_utf16(val)) {
35
0
        p << "\"u:" << JSON::Writer::encode_string(QUtil::utf16_to_utf8(val)) << "\"";
36
0
        return;
37
0
    }
38
    // See if we can unambiguously represent as Unicode.
39
0
    if (util::is_explicit_utf8(val)) {
40
0
        p << "\"u:" << JSON::Writer::encode_string(val.substr(3)) << "\"";
41
0
        return;
42
0
    }
43
0
    if (!useHexString()) {
44
0
        auto candidate = QUtil::pdf_doc_to_utf8(val);
45
0
        std::string test;
46
0
        if (QUtil::utf8_to_pdf_doc(candidate, test, '?') && test == val) {
47
            // This is a PDF-doc string that can be losslessly encoded as Unicode.
48
0
            p << "\"u:" << JSON::Writer::encode_string(candidate) << "\"";
49
0
            return;
50
0
        }
51
0
    }
52
0
    p << "\"b:" << QUtil::hex_encode(val) << "\"";
53
0
}
54
55
bool
56
QPDF_String::useHexString() const
57
67.6k
{
58
    // Heuristic: use the hexadecimal representation of a string if there are any non-printable (in
59
    // PDF Doc encoding) characters or if too large of a proportion of the string consists of
60
    // non-ASCII characters.
61
67.6k
    unsigned int non_ascii = 0;
62
18.3M
    for (auto const ch: val) {
63
18.3M
        if (ch > 126) {
64
2.87M
            ++non_ascii;
65
15.5M
        } else if (ch >= 32) {
66
13.8M
            continue;
67
13.8M
        } else if (ch < 0 || ch >= 24) {
68
318k
            ++non_ascii;
69
1.36M
        } else if (!(ch == '\n' || ch == '\r' || ch == '\t' || ch == '\b' || ch == '\f')) {
70
9.58k
            return true;
71
9.58k
        }
72
18.3M
    }
73
58.0k
    return 5 * non_ascii > val.length();
74
67.6k
}
75
76
std::string
77
QPDF_String::unparse(bool force_binary)
78
91.9k
{
79
91.9k
    bool use_hexstring = force_binary || useHexString();
80
91.9k
    std::string result;
81
91.9k
    if (use_hexstring) {
82
40.8k
        static auto constexpr hexchars = "0123456789abcdef";
83
40.8k
        result.reserve(2 * val.length() + 2);
84
40.8k
        result += '<';
85
27.5M
        for (const char c: val) {
86
27.5M
            result += hexchars[static_cast<unsigned char>(c) >> 4];
87
27.5M
            result += hexchars[c & 0x0f];
88
27.5M
        }
89
40.8k
        result += '>';
90
51.1k
    } else {
91
51.1k
        result += "(";
92
14.9M
        for (unsigned int i = 0; i < val.length(); ++i) {
93
14.8M
            char ch = val.at(i);
94
14.8M
            switch (ch) {
95
1.04M
            case '\n':
96
1.04M
                result += "\\n";
97
1.04M
                break;
98
99
10.9k
            case '\r':
100
10.9k
                result += "\\r";
101
10.9k
                break;
102
103
124k
            case '\t':
104
124k
                result += "\\t";
105
124k
                break;
106
107
5.56k
            case '\b':
108
5.56k
                result += "\\b";
109
5.56k
                break;
110
111
116k
            case '\f':
112
116k
                result += "\\f";
113
116k
                break;
114
115
25.6k
            case '(':
116
25.6k
                result += "\\(";
117
25.6k
                break;
118
119
25.9k
            case ')':
120
25.9k
                result += "\\)";
121
25.9k
                break;
122
123
13.3k
            case '\\':
124
13.3k
                result += "\\\\";
125
13.3k
                break;
126
127
13.5M
            default:
128
13.5M
                if (is_iso_latin1_printable(ch)) {
129
13.4M
                    result += val.at(i);
130
13.4M
                } else {
131
66.5k
                    result += "\\" +
132
66.5k
                        QUtil::int_to_string_base(
133
66.5k
                                  static_cast<int>(static_cast<unsigned char>(ch)), 8, 3);
134
66.5k
                }
135
13.5M
                break;
136
14.8M
            }
137
14.8M
        }
138
51.1k
        result += ")";
139
51.1k
    }
140
141
91.9k
    return result;
142
91.9k
}