Coverage Report

Created: 2025-07-11 07:02

/src/qpdf/libqpdf/InputSource.cc
Line
Count
Source (jump to first uncovered line)
1
#include <qpdf/InputSource_private.hh>
2
3
#include <qpdf/QIntC.hh>
4
#include <qpdf/QTC.hh>
5
#include <cstring>
6
#include <stdexcept>
7
8
using namespace std::literals;
9
10
void
11
InputSource::setLastOffset(qpdf_offset_t offset)
12
15.1M
{
13
15.1M
    this->last_offset = offset;
14
15.1M
}
15
16
qpdf_offset_t
17
InputSource::getLastOffset() const
18
6.58M
{
19
6.58M
    return this->last_offset;
20
6.58M
}
21
22
size_t
23
InputSource::read_line(std::string& str, size_t count, qpdf_offset_t at)
24
16.5k
{
25
    // Return at most max_line_length characters from the next line. Lines are terminated by one or
26
    // more \r or \n characters. Consume the trailing newline characters but don't return them.
27
    // After this is called, the file will be positioned after a line terminator or at the end of
28
    // the file, and last_offset will point to position the file had when this method was called.
29
30
16.5k
    read(str, count, at);
31
16.5k
    auto eol = str.find_first_of("\n\r"sv);
32
16.5k
    if (eol != std::string::npos) {
33
14.2k
        auto next_line = str.find_first_not_of("\n\r"sv, eol);
34
14.2k
        str.resize(eol);
35
14.2k
        if (eol != std::string::npos) {
36
14.2k
            seek(last_offset + static_cast<qpdf_offset_t>(next_line), SEEK_SET);
37
14.2k
            return eol;
38
14.2k
        }
39
14.2k
    }
40
    // We did not necessarily find the end of the trailing newline sequence.
41
2.25k
    seek(last_offset, SEEK_SET);
42
2.25k
    findAndSkipNextEOL();
43
2.25k
    return eol;
44
16.5k
}
45
46
std::string
47
InputSource::readLine(size_t max_line_length)
48
16.5k
{
49
16.5k
    return read_line(max_line_length);
50
16.5k
}
51
52
inline std::string
53
InputSource::read_line(size_t count, qpdf_offset_t at)
54
16.5k
{
55
16.5k
    std::string result(count, '\0');
56
16.5k
    read_line(result, count, at);
57
16.5k
    return result;
58
16.5k
}
59
60
bool
61
InputSource::findFirst(char const* start_chars, qpdf_offset_t offset, size_t len, Finder& finder)
62
66.8k
{
63
    // Basic approach: search for the first character of start_chars starting from offset but not
64
    // going past len (if len != 0). Once the first character is found, see if it is the beginning
65
    // of a sequence of characters matching start_chars. If so, call finder.check() to do
66
    // caller-specific additional checks. If not, keep searching.
67
68
    // This code is tricky and highly subject to off-by-one or other edge case logic errors. See
69
    // comments throughout that explain how we're not missing any edge cases. There are also tests
70
    // specifically constructed to make sure we caught the edge cases in testing.
71
72
66.8k
    char buf[1025]; // size known to input_source.cc in libtests
73
    // To enable us to guarantee null-termination, save an extra byte so that buf[size] is valid
74
    // memory.
75
66.8k
    size_t size = sizeof(buf) - 1;
76
66.8k
    if ((strlen(start_chars) < 1) || (strlen(start_chars) > size)) {
77
0
        throw std::logic_error(
78
0
            "InputSource::findSource called with too small or too large of a character sequence");
79
0
    }
80
81
66.8k
    char* p = nullptr;
82
66.8k
    qpdf_offset_t buf_offset = offset;
83
66.8k
    size_t bytes_read = 0;
84
85
    // Guarantee that we return from this loop. Each time through, we either return, advance p, or
86
    // restart the loop with a condition that will cause return on the next pass. Eventually we will
87
    // either be out of range or hit EOF, either of which forces us to return.
88
2.61M
    while (true) {
89
        // Do we need to read more data? Pretend size = 5, buf starts at 0, and start_chars has 3
90
        // characters. buf[5] is valid and null. If p == 2, start_chars could be buf[2] through
91
        // buf[4], so p + strlen(start_chars) == buf + size is okay. If p points to buf[size], since
92
        // strlen(start_chars) is always >= 1, this overflow test will be correct for that case
93
        // regardless of start_chars.
94
2.61M
        if ((p == nullptr) || ((p + strlen(start_chars)) > (buf + bytes_read))) {
95
212k
            if (p) {
96
145k
                QTC::TC(
97
145k
                    "libtests", "InputSource read next block", ((p == buf + bytes_read) ? 0 : 1));
98
145k
                buf_offset += (p - buf);
99
145k
            }
100
212k
            this->seek(buf_offset, SEEK_SET);
101
            // Read into buffer and zero out the rest of the buffer including buf[size]. We
102
            // allocated an extra byte so that we could guarantee null termination as an extra
103
            // protection against overrun when using string functions.
104
212k
            bytes_read = this->read(buf, size);
105
212k
            if (bytes_read < strlen(start_chars)) {
106
30.3k
                QTC::TC("libtests", "InputSource find EOF", bytes_read == 0 ? 0 : 1);
107
30.3k
                return false;
108
30.3k
            }
109
182k
            memset(buf + bytes_read, '\0', 1 + (size - bytes_read));
110
182k
            p = buf;
111
182k
        }
112
113
        // Search for the first character.
114
2.58M
        if ((p = static_cast<char*>(
115
                 // line-break
116
2.58M
                 memchr(p, start_chars[0], bytes_read - QIntC::to_size(p - buf)))) != nullptr) {
117
2.45M
            if (p == buf) {
118
12.3k
                QTC::TC("libtests", "InputSource found match at buf[0]");
119
12.3k
            }
120
            // Found first letter.
121
2.45M
            if (len != 0) {
122
                // Make sure it's in range.
123
31.6k
                size_t p_relative_offset = QIntC::to_size((p - buf) + (buf_offset - offset));
124
31.6k
                if (p_relative_offset >= len) {
125
                    // out of range
126
1.68k
                    QTC::TC("libtests", "InputSource out of range");
127
1.68k
                    return false;
128
1.68k
                }
129
31.6k
            }
130
2.44M
            if ((p + strlen(start_chars)) > (buf + bytes_read)) {
131
                // If there are not enough bytes left in the file for start_chars, we will detect
132
                // this on the next pass as EOF and return.
133
6.77k
                QTC::TC("libtests", "InputSource not enough bytes");
134
6.77k
                continue;
135
6.77k
            }
136
137
            // See if p points to a sequence matching start_chars. We already checked above to make
138
            // sure we are not going to overrun memory.
139
2.44M
            if (strncmp(p, start_chars, strlen(start_chars)) == 0) {
140
                // Call finder.check() with the input source positioned to the point of the match.
141
55.7k
                this->seek(buf_offset + (p - buf), SEEK_SET);
142
55.7k
                if (finder.check()) {
143
34.8k
                    return true;
144
34.8k
                } else {
145
20.8k
                    QTC::TC("libtests", "InputSource start_chars matched but not check");
146
20.8k
                }
147
2.38M
            } else {
148
2.38M
                QTC::TC("libtests", "InputSource first char matched but not string");
149
2.38M
            }
150
            // This occurrence of the first character wasn't a match. Skip over it and keep
151
            // searching.
152
2.40M
            ++p;
153
2.40M
        } else {
154
            // Trigger reading the next block
155
136k
            p = buf + bytes_read;
156
136k
        }
157
2.58M
    }
158
8
    throw std::logic_error("InputSource after while (true)");
159
66.8k
}
160
161
bool
162
InputSource::findLast(char const* start_chars, qpdf_offset_t offset, size_t len, Finder& finder)
163
18.5k
{
164
18.5k
    bool found = false;
165
18.5k
    qpdf_offset_t after_found_offset = 0;
166
18.5k
    qpdf_offset_t cur_offset = offset;
167
18.5k
    size_t cur_len = len;
168
23.8k
    while (this->findFirst(start_chars, cur_offset, cur_len, finder)) {
169
5.24k
        if (found) {
170
506
            QTC::TC("libtests", "InputSource findLast found more than one");
171
4.73k
        } else {
172
4.73k
            found = true;
173
4.73k
        }
174
5.24k
        after_found_offset = this->tell();
175
5.24k
        cur_offset = after_found_offset;
176
5.24k
        cur_len = len - QIntC::to_size((cur_offset - offset));
177
5.24k
    }
178
18.5k
    if (found) {
179
4.73k
        this->seek(after_found_offset, SEEK_SET);
180
4.73k
    }
181
18.5k
    return found;
182
18.5k
}