Coverage Report

Created: 2025-12-05 06:55

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/qpdf/libqpdf/InputSource.cc
Line
Count
Source
1
#include <qpdf/InputSource_private.hh>
2
3
#include <qpdf/QIntC.hh>
4
#include <qpdf/QTC.hh>
5
#include <qpdf/Util.hh>
6
7
#include <cstring>
8
#include <stdexcept>
9
10
using namespace std::literals;
11
using namespace qpdf;
12
13
void
14
InputSource::setLastOffset(qpdf_offset_t offset)
15
16.0M
{
16
16.0M
    this->last_offset = offset;
17
16.0M
}
18
19
qpdf_offset_t
20
InputSource::getLastOffset() const
21
11.0M
{
22
11.0M
    return this->last_offset;
23
11.0M
}
24
25
size_t
26
InputSource::read_line(std::string& str, size_t count, qpdf_offset_t at)
27
14.7k
{
28
    // Return at most max_line_length characters from the next line. Lines are terminated by one or
29
    // more \r or \n characters. Consume the trailing newline characters but don't return them.
30
    // After this is called, the file will be positioned after a line terminator or at the end of
31
    // the file, and last_offset will point to position the file had when this method was called.
32
33
14.7k
    read(str, count, at);
34
14.7k
    auto eol = str.find_first_of("\n\r"sv);
35
14.7k
    if (eol != std::string::npos) {
36
11.6k
        auto next_line = str.find_first_not_of("\n\r"sv, eol);
37
11.6k
        str.resize(eol);
38
11.6k
        if (eol != std::string::npos) {
39
11.6k
            seek(last_offset + static_cast<qpdf_offset_t>(next_line), SEEK_SET);
40
11.6k
            return eol;
41
11.6k
        }
42
11.6k
    }
43
    // We did not necessarily find the end of the trailing newline sequence.
44
3.14k
    seek(last_offset, SEEK_SET);
45
3.14k
    findAndSkipNextEOL();
46
3.14k
    return eol;
47
14.7k
}
48
49
std::string
50
InputSource::readLine(size_t max_line_length)
51
14.7k
{
52
14.7k
    return read_line(max_line_length);
53
14.7k
}
54
55
inline std::string
56
InputSource::read_line(size_t count, qpdf_offset_t at)
57
14.7k
{
58
14.7k
    std::string result(count, '\0');
59
14.7k
    read_line(result, count, at);
60
14.7k
    return result;
61
14.7k
}
62
63
bool
64
InputSource::findFirst(char const* start_chars, qpdf_offset_t offset, size_t len, Finder& finder)
65
83.3k
{
66
    // Basic approach: search for the first character of start_chars starting from offset but not
67
    // going past len (if len != 0). Once the first character is found, see if it is the beginning
68
    // of a sequence of characters matching start_chars. If so, call finder.check() to do
69
    // caller-specific additional checks. If not, keep searching.
70
71
    // This code is tricky and highly subject to off-by-one or other edge case logic errors. See
72
    // comments throughout that explain how we're not missing any edge cases. There are also tests
73
    // specifically constructed to make sure we caught the edge cases in testing.
74
75
83.3k
    char buf[1025]; // size known to input_source.cc in libtests
76
    // To enable us to guarantee null-termination, save an extra byte so that buf[size] is valid
77
    // memory.
78
83.3k
    size_t size = sizeof(buf) - 1;
79
83.3k
    util::assertion(
80
83.3k
        !(strlen(start_chars) < 1 || strlen(start_chars) > size),
81
83.3k
        "InputSource::findSource called with too small or too large of a character sequence" //
82
83.3k
    );
83
84
83.3k
    char* p = nullptr;
85
83.3k
    qpdf_offset_t buf_offset = offset;
86
83.3k
    size_t bytes_read = 0;
87
88
    // Guarantee that we return from this loop. Each time through, we either return, advance p, or
89
    // restart the loop with a condition that will cause return on the next pass. Eventually we will
90
    // either be out of range or hit EOF, either of which forces us to return.
91
2.45M
    while (true) {
92
        // Do we need to read more data? Pretend size = 5, buf starts at 0, and start_chars has 3
93
        // characters. buf[5] is valid and null. If p == 2, start_chars could be buf[2] through
94
        // buf[4], so p + strlen(start_chars) == buf + size is okay. If p points to buf[size], since
95
        // strlen(start_chars) is always >= 1, this overflow test will be correct for that case
96
        // regardless of start_chars.
97
2.45M
        if ((p == nullptr) || ((p + strlen(start_chars)) > (buf + bytes_read))) {
98
234k
            if (p) {
99
150k
                QTC::TC(
100
150k
                    "libtests", "InputSource read next block", ((p == buf + bytes_read) ? 0 : 1));
101
150k
                buf_offset += (p - buf);
102
150k
            }
103
234k
            this->seek(buf_offset, SEEK_SET);
104
            // Read into buffer and zero out the rest of the buffer including buf[size]. We
105
            // allocated an extra byte so that we could guarantee null termination as an extra
106
            // protection against overrun when using string functions.
107
234k
            bytes_read = this->read(buf, size);
108
234k
            if (bytes_read < strlen(start_chars)) {
109
42.8k
                QTC::TC("libtests", "InputSource find EOF", bytes_read == 0 ? 0 : 1);
110
42.8k
                return false;
111
42.8k
            }
112
191k
            memset(buf + bytes_read, '\0', 1 + (size - bytes_read));
113
191k
            p = buf;
114
191k
        }
115
116
        // Search for the first character.
117
2.41M
        if ((p = static_cast<char*>(
118
                 // line-break
119
2.41M
                 memchr(p, start_chars[0], bytes_read - QIntC::to_size(p - buf)))) != nullptr) {
120
2.26M
            if (p == buf) {
121
6.85k
                QTC::TC("libtests", "InputSource found match at buf[0]");
122
6.85k
            }
123
            // Found first letter.
124
2.26M
            if (len != 0) {
125
                // Make sure it's in range.
126
28.4k
                size_t p_relative_offset = QIntC::to_size((p - buf) + (buf_offset - offset));
127
28.4k
                if (p_relative_offset >= len) {
128
                    // out of range
129
2.02k
                    QTC::TC("libtests", "InputSource out of range");
130
2.02k
                    return false;
131
2.02k
                }
132
28.4k
            }
133
2.26M
            if ((p + strlen(start_chars)) > (buf + bytes_read)) {
134
                // If there are not enough bytes left in the file for start_chars, we will detect
135
                // this on the next pass as EOF and return.
136
3.04k
                QTC::TC("libtests", "InputSource not enough bytes");
137
3.04k
                continue;
138
3.04k
            }
139
140
            // See if p points to a sequence matching start_chars. We already checked above to make
141
            // sure we are not going to overrun memory.
142
2.26M
            if (strncmp(p, start_chars, strlen(start_chars)) == 0) {
143
                // Call finder.check() with the input source positioned to the point of the match.
144
159k
                this->seek(buf_offset + (p - buf), SEEK_SET);
145
159k
                if (finder.check()) {
146
38.5k
                    return true;
147
120k
                } else {
148
120k
                    QTC::TC("libtests", "InputSource start_chars matched but not check");
149
120k
                }
150
2.10M
            } else {
151
2.10M
                QTC::TC("libtests", "InputSource first char matched but not string");
152
2.10M
            }
153
            // This occurrence of the first character wasn't a match. Skip over it and keep
154
            // searching.
155
2.22M
            ++p;
156
2.22M
        } else {
157
            // Trigger reading the next block
158
145k
            p = buf + bytes_read;
159
145k
        }
160
2.41M
    }
161
83.3k
}
162
163
bool
164
InputSource::findLast(char const* start_chars, qpdf_offset_t offset, size_t len, Finder& finder)
165
23.0k
{
166
23.0k
    bool found = false;
167
23.0k
    qpdf_offset_t after_found_offset = 0;
168
23.0k
    qpdf_offset_t cur_offset = offset;
169
23.0k
    size_t cur_len = len;
170
29.2k
    while (this->findFirst(start_chars, cur_offset, cur_len, finder)) {
171
6.22k
        if (found) {
172
353
            QTC::TC("libtests", "InputSource findLast found more than one");
173
5.87k
        } else {
174
5.87k
            found = true;
175
5.87k
        }
176
6.22k
        after_found_offset = this->tell();
177
6.22k
        cur_offset = after_found_offset;
178
6.22k
        cur_len = len - QIntC::to_size((cur_offset - offset));
179
6.22k
    }
180
23.0k
    if (found) {
181
        this->seek(after_found_offset, SEEK_SET);
182
5.87k
    }
183
23.0k
    return found;
184
23.0k
}