/src/qpdf/libqpdf/InputSource.cc
Line | Count | Source |
1 | | #include <qpdf/InputSource_private.hh> |
2 | | |
3 | | #include <qpdf/QIntC.hh> |
4 | | #include <qpdf/QTC.hh> |
5 | | #include <qpdf/Util.hh> |
6 | | |
7 | | #include <cstring> |
8 | | #include <stdexcept> |
9 | | |
10 | | using namespace std::literals; |
11 | | using namespace qpdf; |
12 | | |
13 | | void |
14 | | InputSource::setLastOffset(qpdf_offset_t offset) |
15 | 16.0M | { |
16 | 16.0M | this->last_offset = offset; |
17 | 16.0M | } |
18 | | |
19 | | qpdf_offset_t |
20 | | InputSource::getLastOffset() const |
21 | 11.0M | { |
22 | 11.0M | return this->last_offset; |
23 | 11.0M | } |
24 | | |
25 | | size_t |
26 | | InputSource::read_line(std::string& str, size_t count, qpdf_offset_t at) |
27 | 14.7k | { |
28 | | // Return at most max_line_length characters from the next line. Lines are terminated by one or |
29 | | // more \r or \n characters. Consume the trailing newline characters but don't return them. |
30 | | // After this is called, the file will be positioned after a line terminator or at the end of |
31 | | // the file, and last_offset will point to position the file had when this method was called. |
32 | | |
33 | 14.7k | read(str, count, at); |
34 | 14.7k | auto eol = str.find_first_of("\n\r"sv); |
35 | 14.7k | if (eol != std::string::npos) { |
36 | 11.6k | auto next_line = str.find_first_not_of("\n\r"sv, eol); |
37 | 11.6k | str.resize(eol); |
38 | 11.6k | if (eol != std::string::npos) { |
39 | 11.6k | seek(last_offset + static_cast<qpdf_offset_t>(next_line), SEEK_SET); |
40 | 11.6k | return eol; |
41 | 11.6k | } |
42 | 11.6k | } |
43 | | // We did not necessarily find the end of the trailing newline sequence. |
44 | 3.14k | seek(last_offset, SEEK_SET); |
45 | 3.14k | findAndSkipNextEOL(); |
46 | 3.14k | return eol; |
47 | 14.7k | } |
48 | | |
49 | | std::string |
50 | | InputSource::readLine(size_t max_line_length) |
51 | 14.7k | { |
52 | 14.7k | return read_line(max_line_length); |
53 | 14.7k | } |
54 | | |
55 | | inline std::string |
56 | | InputSource::read_line(size_t count, qpdf_offset_t at) |
57 | 14.7k | { |
58 | 14.7k | std::string result(count, '\0'); |
59 | 14.7k | read_line(result, count, at); |
60 | 14.7k | return result; |
61 | 14.7k | } |
62 | | |
63 | | bool |
64 | | InputSource::findFirst(char const* start_chars, qpdf_offset_t offset, size_t len, Finder& finder) |
65 | 83.3k | { |
66 | | // Basic approach: search for the first character of start_chars starting from offset but not |
67 | | // going past len (if len != 0). Once the first character is found, see if it is the beginning |
68 | | // of a sequence of characters matching start_chars. If so, call finder.check() to do |
69 | | // caller-specific additional checks. If not, keep searching. |
70 | | |
71 | | // This code is tricky and highly subject to off-by-one or other edge case logic errors. See |
72 | | // comments throughout that explain how we're not missing any edge cases. There are also tests |
73 | | // specifically constructed to make sure we caught the edge cases in testing. |
74 | | |
75 | 83.3k | char buf[1025]; // size known to input_source.cc in libtests |
76 | | // To enable us to guarantee null-termination, save an extra byte so that buf[size] is valid |
77 | | // memory. |
78 | 83.3k | size_t size = sizeof(buf) - 1; |
79 | 83.3k | util::assertion( |
80 | 83.3k | !(strlen(start_chars) < 1 || strlen(start_chars) > size), |
81 | 83.3k | "InputSource::findSource called with too small or too large of a character sequence" // |
82 | 83.3k | ); |
83 | | |
84 | 83.3k | char* p = nullptr; |
85 | 83.3k | qpdf_offset_t buf_offset = offset; |
86 | 83.3k | size_t bytes_read = 0; |
87 | | |
88 | | // Guarantee that we return from this loop. Each time through, we either return, advance p, or |
89 | | // restart the loop with a condition that will cause return on the next pass. Eventually we will |
90 | | // either be out of range or hit EOF, either of which forces us to return. |
91 | 2.45M | while (true) { |
92 | | // Do we need to read more data? Pretend size = 5, buf starts at 0, and start_chars has 3 |
93 | | // characters. buf[5] is valid and null. If p == 2, start_chars could be buf[2] through |
94 | | // buf[4], so p + strlen(start_chars) == buf + size is okay. If p points to buf[size], since |
95 | | // strlen(start_chars) is always >= 1, this overflow test will be correct for that case |
96 | | // regardless of start_chars. |
97 | 2.45M | if ((p == nullptr) || ((p + strlen(start_chars)) > (buf + bytes_read))) { |
98 | 234k | if (p) { |
99 | 150k | QTC::TC( |
100 | 150k | "libtests", "InputSource read next block", ((p == buf + bytes_read) ? 0 : 1)); |
101 | 150k | buf_offset += (p - buf); |
102 | 150k | } |
103 | 234k | this->seek(buf_offset, SEEK_SET); |
104 | | // Read into buffer and zero out the rest of the buffer including buf[size]. We |
105 | | // allocated an extra byte so that we could guarantee null termination as an extra |
106 | | // protection against overrun when using string functions. |
107 | 234k | bytes_read = this->read(buf, size); |
108 | 234k | if (bytes_read < strlen(start_chars)) { |
109 | 42.8k | QTC::TC("libtests", "InputSource find EOF", bytes_read == 0 ? 0 : 1); |
110 | 42.8k | return false; |
111 | 42.8k | } |
112 | 191k | memset(buf + bytes_read, '\0', 1 + (size - bytes_read)); |
113 | 191k | p = buf; |
114 | 191k | } |
115 | | |
116 | | // Search for the first character. |
117 | 2.41M | if ((p = static_cast<char*>( |
118 | | // line-break |
119 | 2.41M | memchr(p, start_chars[0], bytes_read - QIntC::to_size(p - buf)))) != nullptr) { |
120 | 2.26M | if (p == buf) { |
121 | 6.85k | QTC::TC("libtests", "InputSource found match at buf[0]"); |
122 | 6.85k | } |
123 | | // Found first letter. |
124 | 2.26M | if (len != 0) { |
125 | | // Make sure it's in range. |
126 | 28.4k | size_t p_relative_offset = QIntC::to_size((p - buf) + (buf_offset - offset)); |
127 | 28.4k | if (p_relative_offset >= len) { |
128 | | // out of range |
129 | 2.02k | QTC::TC("libtests", "InputSource out of range"); |
130 | 2.02k | return false; |
131 | 2.02k | } |
132 | 28.4k | } |
133 | 2.26M | if ((p + strlen(start_chars)) > (buf + bytes_read)) { |
134 | | // If there are not enough bytes left in the file for start_chars, we will detect |
135 | | // this on the next pass as EOF and return. |
136 | 3.04k | QTC::TC("libtests", "InputSource not enough bytes"); |
137 | 3.04k | continue; |
138 | 3.04k | } |
139 | | |
140 | | // See if p points to a sequence matching start_chars. We already checked above to make |
141 | | // sure we are not going to overrun memory. |
142 | 2.26M | if (strncmp(p, start_chars, strlen(start_chars)) == 0) { |
143 | | // Call finder.check() with the input source positioned to the point of the match. |
144 | 159k | this->seek(buf_offset + (p - buf), SEEK_SET); |
145 | 159k | if (finder.check()) { |
146 | 38.5k | return true; |
147 | 120k | } else { |
148 | 120k | QTC::TC("libtests", "InputSource start_chars matched but not check"); |
149 | 120k | } |
150 | 2.10M | } else { |
151 | 2.10M | QTC::TC("libtests", "InputSource first char matched but not string"); |
152 | 2.10M | } |
153 | | // This occurrence of the first character wasn't a match. Skip over it and keep |
154 | | // searching. |
155 | 2.22M | ++p; |
156 | 2.22M | } else { |
157 | | // Trigger reading the next block |
158 | 145k | p = buf + bytes_read; |
159 | 145k | } |
160 | 2.41M | } |
161 | 83.3k | } |
162 | | |
163 | | bool |
164 | | InputSource::findLast(char const* start_chars, qpdf_offset_t offset, size_t len, Finder& finder) |
165 | 23.0k | { |
166 | 23.0k | bool found = false; |
167 | 23.0k | qpdf_offset_t after_found_offset = 0; |
168 | 23.0k | qpdf_offset_t cur_offset = offset; |
169 | 23.0k | size_t cur_len = len; |
170 | 29.2k | while (this->findFirst(start_chars, cur_offset, cur_len, finder)) { |
171 | 6.22k | if (found) { |
172 | 353 | QTC::TC("libtests", "InputSource findLast found more than one"); |
173 | 5.87k | } else { |
174 | 5.87k | found = true; |
175 | 5.87k | } |
176 | 6.22k | after_found_offset = this->tell(); |
177 | 6.22k | cur_offset = after_found_offset; |
178 | 6.22k | cur_len = len - QIntC::to_size((cur_offset - offset)); |
179 | 6.22k | } |
180 | 23.0k | if (found) { |
181 | | this->seek(after_found_offset, SEEK_SET); |
182 | 5.87k | } |
183 | 23.0k | return found; |
184 | 23.0k | } |