Coverage Report

Created: 2024-05-20 06:28

/src/qpdf/libqpdf/InputSource.cc
Line
Count
Source (jump to first uncovered line)
1
#include <qpdf/InputSource.hh>
2
3
#include <qpdf/QIntC.hh>
4
#include <qpdf/QTC.hh>
5
#include <cstring>
6
#include <stdexcept>
7
8
void
9
InputSource::setLastOffset(qpdf_offset_t offset)
10
114M
{
11
114M
    this->last_offset = offset;
12
114M
}
13
14
qpdf_offset_t
15
InputSource::getLastOffset() const
16
35.0M
{
17
35.0M
    return this->last_offset;
18
35.0M
}
19
20
std::string
21
InputSource::readLine(size_t max_line_length)
22
116k
{
23
    // Return at most max_line_length characters from the next line. Lines are terminated by one or
24
    // more \r or \n characters. Consume the trailing newline characters but don't return them.
25
    // After this is called, the file will be positioned after a line terminator or at the end of
26
    // the file, and last_offset will point to position the file had when this method was called.
27
28
116k
    qpdf_offset_t offset = this->tell();
29
116k
    auto bp = std::make_unique<char[]>(max_line_length + 1);
30
116k
    char* buf = bp.get();
31
116k
    memset(buf, '\0', max_line_length + 1);
32
116k
    this->read(buf, max_line_length);
33
116k
    this->seek(offset, SEEK_SET);
34
116k
    qpdf_offset_t eol = this->findAndSkipNextEOL();
35
116k
    this->last_offset = offset;
36
116k
    size_t line_length = QIntC::to_size(eol - offset);
37
116k
    if (line_length < max_line_length) {
38
115k
        buf[line_length] = '\0';
39
115k
    }
40
116k
    return {buf};
41
116k
}
42
43
bool
44
InputSource::findFirst(char const* start_chars, qpdf_offset_t offset, size_t len, Finder& finder)
45
170k
{
46
    // Basic approach: search for the first character of start_chars starting from offset but not
47
    // going past len (if len != 0). Once the first character is found, see if it is the beginning
48
    // of a sequence of characters matching start_chars. If so, call finder.check() to do
49
    // caller-specific additional checks. If not, keep searching.
50
51
    // This code is tricky and highly subject to off-by-one or other edge case logic errors. See
52
    // comments throughout that explain how we're not missing any edge cases. There are also tests
53
    // specifically constructed to make sure we caught the edge cases in testing.
54
55
170k
    char buf[1025]; // size known to input_source.cc in libtests
56
    // To enable us to guarantee null-termination, save an extra byte so that buf[size] is valid
57
    // memory.
58
170k
    size_t size = sizeof(buf) - 1;
59
170k
    if ((strlen(start_chars) < 1) || (strlen(start_chars) > size)) {
60
0
        throw std::logic_error("InputSource::findSource called with"
61
0
                               " too small or too large of a character sequence");
62
0
    }
63
64
170k
    char* p = nullptr;
65
170k
    qpdf_offset_t buf_offset = offset;
66
170k
    size_t bytes_read = 0;
67
68
    // Guarantee that we return from this loop. Each time through, we either return, advance p, or
69
    // restart the loop with a condition that will cause return on the next pass. Eventually we will
70
    // either be out of range or hit EOF, either of which forces us to return.
71
2.03M
    while (true) {
72
        // Do we need to read more data? Pretend size = 5, buf starts at 0, and start_chars has 3
73
        // characters. buf[5] is valid and null. If p == 2, start_chars could be buf[2] through
74
        // buf[4], so p + strlen(start_chars) == buf + size is okay. If p points to buf[size], since
75
        // strlen(start_chars) is always >= 1, this overflow test will be correct for that case
76
        // regardless of start_chars.
77
2.03M
        if ((p == nullptr) || ((p + strlen(start_chars)) > (buf + bytes_read))) {
78
636k
            if (p) {
79
465k
                QTC::TC(
80
465k
                    "libtests", "InputSource read next block", ((p == buf + bytes_read) ? 0 : 1));
81
465k
                buf_offset += (p - buf);
82
465k
            }
83
636k
            this->seek(buf_offset, SEEK_SET);
84
            // Read into buffer and zero out the rest of the buffer including buf[size]. We
85
            // allocated an extra byte so that we could guarantee null termination as an extra
86
            // protection against overrun when using string functions.
87
636k
            bytes_read = this->read(buf, size);
88
636k
            if (bytes_read < strlen(start_chars)) {
89
42.6k
                QTC::TC("libtests", "InputSource find EOF", bytes_read == 0 ? 0 : 1);
90
42.6k
                return false;
91
42.6k
            }
92
593k
            memset(buf + bytes_read, '\0', 1 + (size - bytes_read));
93
593k
            p = buf;
94
593k
        }
95
96
        // Search for the first character.
97
1.99M
        if ((p = static_cast<char*>(
98
                 // line-break
99
1.99M
                 memchr(p, start_chars[0], bytes_read - QIntC::to_size(p - buf)))) != nullptr) {
100
1.53M
            if (p == buf) {
101
31.5k
                QTC::TC("libtests", "InputSource found match at buf[0]");
102
31.5k
            }
103
            // Found first letter.
104
1.53M
            if (len != 0) {
105
                // Make sure it's in range.
106
81.5k
                size_t p_relative_offset = QIntC::to_size((p - buf) + (buf_offset - offset));
107
81.5k
                if (p_relative_offset >= len) {
108
                    // out of range
109
9.31k
                    QTC::TC("libtests", "InputSource out of range");
110
9.31k
                    return false;
111
9.31k
                }
112
81.5k
            }
113
1.52M
            if ((p + strlen(start_chars)) > (buf + bytes_read)) {
114
                // If there are not enough bytes left in the file for start_chars, we will detect
115
                // this on the next pass as EOF and return.
116
5.18k
                QTC::TC("libtests", "InputSource not enough bytes");
117
5.18k
                continue;
118
5.18k
            }
119
120
            // See if p points to a sequence matching start_chars. We already checked above to make
121
            // sure we are not going to overrun memory.
122
1.52M
            if (strncmp(p, start_chars, strlen(start_chars)) == 0) {
123
                // Call finder.check() with the input source positioned to the point of the match.
124
132k
                this->seek(buf_offset + (p - buf), SEEK_SET);
125
132k
                if (finder.check()) {
126
118k
                    return true;
127
118k
                } else {
128
14.0k
                    QTC::TC("libtests", "InputSource start_chars matched but not check");
129
14.0k
                }
130
1.38M
            } else {
131
1.38M
                QTC::TC("libtests", "InputSource first char matched but not string");
132
1.38M
            }
133
            // This occurrence of the first character wasn't a match. Skip over it and keep
134
            // searching.
135
1.40M
            ++p;
136
1.40M
        } else {
137
            // Trigger reading the next block
138
458k
            p = buf + bytes_read;
139
458k
        }
140
1.99M
    }
141
0
    throw std::logic_error("InputSource after while (true)");
142
170k
}
143
144
bool
145
InputSource::findLast(char const* start_chars, qpdf_offset_t offset, size_t len, Finder& finder)
146
36.5k
{
147
36.5k
    bool found = false;
148
36.5k
    qpdf_offset_t after_found_offset = 0;
149
36.5k
    qpdf_offset_t cur_offset = offset;
150
36.5k
    size_t cur_len = len;
151
60.4k
    while (this->findFirst(start_chars, cur_offset, cur_len, finder)) {
152
23.9k
        if (found) {
153
1.07k
            QTC::TC("libtests", "InputSource findLast found more than one");
154
22.8k
        } else {
155
22.8k
            found = true;
156
22.8k
        }
157
23.9k
        after_found_offset = this->tell();
158
23.9k
        cur_offset = after_found_offset;
159
23.9k
        cur_len = len - QIntC::to_size((cur_offset - offset));
160
23.9k
    }
161
36.5k
    if (found) {
162
22.8k
        this->seek(after_found_offset, SEEK_SET);
163
22.8k
    }
164
36.5k
    return found;
165
36.5k
}