Coverage Report

Created: 2025-07-11 07:02

/src/qpdf/libqpdf/QPDF_String.cc
Line
Count
Source (jump to first uncovered line)
1
#include <qpdf/QPDFObject_private.hh>
2
3
#include <qpdf/QPDFObjectHandle_private.hh>
4
#include <qpdf/QUtil.hh>
5
6
// DO NOT USE ctype -- it is locale dependent for some things, and it's not worth the risk of
7
// including it in case it may accidentally be used.
8
9
static bool
10
is_iso_latin1_printable(char ch)
11
6.38M
{
12
6.38M
    return (((ch >= 32) && (ch <= 126)) || (static_cast<unsigned char>(ch) >= 160));
13
6.38M
}
14
15
std::shared_ptr<QPDFObject>
16
QPDF_String::create_utf16(std::string const& utf8_val)
17
0
{
18
0
    std::string result;
19
0
    if (!QUtil::utf8_to_pdf_doc(utf8_val, result, '?')) {
20
0
        result = QUtil::utf8_to_utf16(utf8_val);
21
0
    }
22
0
    return QPDFObject::create<QPDF_String>(result);
23
0
}
24
25
void
26
QPDF_String::writeJSON(int json_version, JSON::Writer& p)
27
0
{
28
0
    auto candidate = getUTF8Val();
29
0
    if (json_version == 1) {
30
0
        p << "\"" << JSON::Writer::encode_string(candidate) << "\"";
31
0
    } else {
32
        // See if we can unambiguously represent as Unicode.
33
0
        if (QUtil::is_utf16(val) || QUtil::is_explicit_utf8(val)) {
34
0
            p << "\"u:" << JSON::Writer::encode_string(candidate) << "\"";
35
0
            return;
36
0
        } else if (!useHexString()) {
37
0
            std::string test;
38
0
            if (QUtil::utf8_to_pdf_doc(candidate, test, '?') && (test == val)) {
39
                // This is a PDF-doc string that can be losslessly encoded as Unicode.
40
0
                p << "\"u:" << JSON::Writer::encode_string(candidate) << "\"";
41
0
                return;
42
0
            }
43
0
        }
44
0
        p << "\"b:" << QUtil::hex_encode(val) << "\"";
45
0
    }
46
0
}
47
48
bool
49
QPDF_String::useHexString() const
50
12.5k
{
51
    // Heuristic: use the hexadecimal representation of a string if there are any non-printable (in
52
    // PDF Doc encoding) characters or if too large of a proportion of the string consists of
53
    // non-ASCII characters.
54
12.5k
    unsigned int non_ascii = 0;
55
7.94M
    for (auto const ch: val) {
56
7.94M
        if (ch > 126) {
57
2.54k
            ++non_ascii;
58
7.94M
        } else if (ch >= 32) {
59
7.57M
            continue;
60
7.57M
        } else if (ch < 0 || ch >= 24) {
61
340k
            ++non_ascii;
62
340k
        } else if (!(ch == '\n' || ch == '\r' || ch == '\t' || ch == '\b' || ch == '\f')) {
63
1.81k
            return true;
64
1.81k
        }
65
7.94M
    }
66
10.7k
    return 5 * non_ascii > val.length();
67
12.5k
}
68
69
std::string
70
QPDF_String::unparse(bool force_binary)
71
152k
{
72
152k
    bool use_hexstring = force_binary || useHexString();
73
152k
    std::string result;
74
152k
    if (use_hexstring) {
75
142k
        static auto constexpr hexchars = "0123456789abcdef";
76
142k
        result.reserve(2 * val.length() + 2);
77
142k
        result += '<';
78
17.7M
        for (const char c: val) {
79
17.7M
            result += hexchars[static_cast<unsigned char>(c) >> 4];
80
17.7M
            result += hexchars[c & 0x0f];
81
17.7M
        }
82
142k
        result += '>';
83
142k
    } else {
84
10.2k
        result += "(";
85
6.42M
        for (unsigned int i = 0; i < val.length(); ++i) {
86
6.41M
            char ch = val.at(i);
87
6.41M
            switch (ch) {
88
15.5k
            case '\n':
89
15.5k
                result += "\\n";
90
15.5k
                break;
91
92
214
            case '\r':
93
214
                result += "\\r";
94
214
                break;
95
96
4.92k
            case '\t':
97
4.92k
                result += "\\t";
98
4.92k
                break;
99
100
1.24k
            case '\b':
101
1.24k
                result += "\\b";
102
1.24k
                break;
103
104
6.54k
            case '\f':
105
6.54k
                result += "\\f";
106
6.54k
                break;
107
108
892
            case '(':
109
892
                result += "\\(";
110
892
                break;
111
112
930
            case ')':
113
930
                result += "\\)";
114
930
                break;
115
116
2.03k
            case '\\':
117
2.03k
                result += "\\\\";
118
2.03k
                break;
119
120
6.38M
            default:
121
6.38M
                if (is_iso_latin1_printable(ch)) {
122
6.37M
                    result += val.at(i);
123
6.37M
                } else {
124
4.03k
                    result += "\\" +
125
4.03k
                        QUtil::int_to_string_base(
126
4.03k
                                  static_cast<int>(static_cast<unsigned char>(ch)), 8, 3);
127
4.03k
                }
128
6.38M
                break;
129
6.41M
            }
130
6.41M
        }
131
10.2k
        result += ")";
132
10.2k
    }
133
134
152k
    return result;
135
152k
}
136
137
std::string
138
QPDF_String::getUTF8Val() const
139
0
{
140
0
    if (QUtil::is_utf16(val)) {
141
0
        return QUtil::utf16_to_utf8(val);
142
0
    } else if (QUtil::is_explicit_utf8(val)) {
143
        // PDF 2.0 allows UTF-8 strings when explicitly prefixed with the three-byte representation
144
        // of U+FEFF.
145
0
        return val.substr(3);
146
0
    } else {
147
0
        return QUtil::pdf_doc_to_utf8(val);
148
0
    }
149
0
}