/src/sleuthkit/tsk/img/img_io.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Brian Carrier [carrier <at> sleuthkit [dot] org] |
3 | | * Copyright (c) 2011 Brian Carrier. All Rights reserved |
4 | | * |
5 | | * This software is distributed under the Common Public License 1.0 |
6 | | */ |
7 | | |
8 | | /** |
9 | | * \file img_io.c |
10 | | * Contains the basic img reading API redirection functions. |
11 | | */ |
12 | | |
13 | | #include "tsk_img_i.h" |
14 | | #include "legacy_cache.h" |
15 | | |
16 | | #include <chrono> |
17 | | #include <memory> |
18 | | #include <new> |
19 | | |
20 | | class Timer { |
21 | | public: |
22 | 13.3M | size_t elapsed() const { |
23 | 13.3M | return std::chrono::duration_cast<std::chrono::nanoseconds>( |
24 | 13.3M | stop_time - start_time |
25 | 13.3M | ).count(); |
26 | 13.3M | } |
27 | | |
28 | 13.4M | void start() { |
29 | 13.4M | start_time = std::chrono::high_resolution_clock::now(); |
30 | 13.4M | } |
31 | | |
32 | 13.3M | void stop() { |
33 | 13.3M | stop_time = std::chrono::high_resolution_clock::now(); |
34 | 13.3M | } |
35 | | private: |
36 | | std::chrono::high_resolution_clock::time_point start_time, stop_time; |
37 | | }; |
38 | | |
39 | | // This function assumes that we hold the cache_lock even though we're not modyfying |
40 | | // the cache. This is because the lower-level read callbacks make the same assumption. |
41 | | static ssize_t img_read_no_cache(TSK_IMG_INFO * a_img_info, TSK_OFF_T a_off, |
42 | | char *a_buf, size_t a_len) |
43 | 64.5k | { |
44 | 64.5k | ssize_t nbytes; |
45 | | |
46 | 64.5k | IMG_INFO* iif = reinterpret_cast<IMG_INFO*>(a_img_info); |
47 | | |
48 | | /* Some of the lower-level methods like block-sized reads. |
49 | | * So if the len is not that multiple, then make it. */ |
50 | 64.5k | if (a_img_info->sector_size > 0 && a_len % a_img_info->sector_size) { |
51 | 61.4k | size_t len_tmp; |
52 | 61.4k | len_tmp = roundup(a_len, a_img_info->sector_size); |
53 | | |
54 | 61.4k | std::unique_ptr<char[]> buf2(new(std::nothrow) char[len_tmp]); |
55 | 61.4k | if (!buf2) { |
56 | 0 | return -1; |
57 | 0 | } |
58 | | |
59 | 61.4k | nbytes = iif->read(a_img_info, a_off, buf2.get(), len_tmp); |
60 | 61.4k | if (nbytes < 0) { |
61 | 0 | return -1; |
62 | 0 | } |
63 | | |
64 | 61.4k | if (nbytes < (ssize_t) a_len) { |
65 | 57.0k | memcpy(a_buf, buf2.get(), nbytes); |
66 | 57.0k | } |
67 | 4.48k | else { |
68 | 4.48k | memcpy(a_buf, buf2.get(), a_len); |
69 | 4.48k | nbytes = (ssize_t)a_len; |
70 | 4.48k | } |
71 | 61.4k | } |
72 | 3.02k | else { |
73 | 3.02k | nbytes = iif->read(a_img_info, a_off, a_buf, a_len); |
74 | 3.02k | } |
75 | | |
76 | 64.5k | return nbytes; |
77 | 64.5k | } |
78 | | |
79 | | ssize_t tsk_img_read_no_cache( |
80 | | TSK_IMG_INFO* a_img_info, |
81 | | TSK_OFF_T a_off, |
82 | | char* a_buf, |
83 | | size_t a_len) |
84 | 0 | { |
85 | 0 | IMG_INFO* iif = reinterpret_cast<IMG_INFO*>(a_img_info); |
86 | |
|
87 | 0 | Timer timer; |
88 | 0 | Stats& stats = iif->stats; |
89 | |
|
90 | 0 | ssize_t read_count = 0; |
91 | |
|
92 | 0 | auto cache = static_cast<LegacyCache*>(iif->cache); |
93 | 0 | cache->lock(); |
94 | 0 | timer.start(); |
95 | 0 | read_count = img_read_no_cache(a_img_info, a_off, a_buf, a_len); |
96 | 0 | timer.stop(); |
97 | 0 | stats.miss_ns += timer.elapsed(); |
98 | 0 | ++stats.misses; |
99 | 0 | stats.miss_bytes += read_count; |
100 | 0 | cache->unlock(); |
101 | |
|
102 | 0 | return read_count; |
103 | 0 | } |
104 | | |
105 | | ssize_t |
106 | | tsk_img_read_legacy( |
107 | | TSK_IMG_INFO* a_img_info, |
108 | | TSK_OFF_T a_off, |
109 | | char* a_buf, |
110 | | size_t a_len) |
111 | 13.3M | { |
112 | 13.3M | IMG_INFO* iif = reinterpret_cast<IMG_INFO*>(a_img_info); |
113 | | |
114 | 13.3M | Timer timer; |
115 | 13.3M | Stats& stats = iif->stats; |
116 | | |
117 | 15.3M | #define CACHE_AGE 1000 |
118 | 13.3M | ssize_t read_count = 0; |
119 | | |
120 | | /* cache_lock is used for both the cache in IMG_INFO and |
121 | | * the shared variables in the img type specific INFO structs. |
122 | | * grab it now so that it is held before any reads. |
123 | | */ |
124 | 13.3M | auto cache = static_cast<LegacyCache*>(iif->cache); |
125 | 13.3M | cache->lock(); |
126 | | |
127 | | // if they ask for more than the cache length, skip the cache |
128 | 13.3M | if (a_len + (a_off % 512) > TSK_IMG_INFO_CACHE_LEN) { |
129 | 64.5k | timer.start(); |
130 | 64.5k | read_count = img_read_no_cache(a_img_info, a_off, a_buf, a_len); |
131 | 64.5k | timer.stop(); |
132 | 64.5k | stats.miss_ns += timer.elapsed(); |
133 | 64.5k | ++stats.misses; |
134 | 64.5k | stats.miss_bytes += read_count; |
135 | 64.5k | cache->unlock(); |
136 | 64.5k | return read_count; |
137 | 64.5k | } |
138 | | |
139 | | /* See if the requested length is going to be too long. |
140 | | * we'll use this length when checking the cache. */ |
141 | 13.2M | size_t len2 = a_len; |
142 | | |
143 | | // Protect against INT64_MAX + INT64_MAX > value |
144 | 13.2M | if ((TSK_OFF_T) len2 > a_img_info->size |
145 | 13.2M | || a_off >= a_img_info->size - (TSK_OFF_T)len2) { |
146 | 318k | len2 = (size_t) (a_img_info->size - a_off); |
147 | 318k | } |
148 | | |
149 | 13.2M | int cache_next = 0; // index to lowest age cache (to use next) |
150 | | |
151 | 13.2M | timer.start(); |
152 | | |
153 | | // check if it is in the cache |
154 | 438M | for (int cache_index = 0; cache_index < TSK_IMG_INFO_CACHE_NUM; cache_index++) { |
155 | | |
156 | | // Look into the in-use cache entries |
157 | 425M | if (cache->cache_len[cache_index] > 0) { |
158 | | |
159 | | // the read_count check makes sure we don't go back in after data was read |
160 | 68.7M | if (read_count == 0 |
161 | 68.7M | && cache->cache_off[cache_index] <= a_off |
162 | 68.7M | && cache->cache_off[cache_index] + |
163 | 16.3M | cache->cache_len[cache_index] >= a_off + len2) { |
164 | | |
165 | | /* |
166 | | if (tsk_verbose) |
167 | | fprintf(stderr, |
168 | | "tsk_img_read: Read found in cache %d\n", cache_index ); |
169 | | */ |
170 | | |
171 | | // We found it... |
172 | 15.2M | memcpy(a_buf, |
173 | 15.2M | &cache->cache[cache_index][a_off - |
174 | 15.2M | cache->cache_off[cache_index]], len2); |
175 | 15.2M | read_count = (ssize_t) len2; |
176 | | |
177 | | // reset its "age" since it was useful |
178 | 15.2M | cache->cache_age[cache_index] = CACHE_AGE; |
179 | | |
180 | | // we don't break out of the loop so that we update all ages |
181 | | |
182 | 15.2M | ++stats.hits; |
183 | 15.2M | stats.hit_bytes += read_count; |
184 | 15.2M | } |
185 | 53.5M | else { |
186 | | /* decrease its "age" since it was not useful. |
187 | | * We don't let used ones go below 1 so that they are not |
188 | | * confused with entries that have never been used. */ |
189 | 53.5M | cache->cache_age[cache_index]--; |
190 | | |
191 | | // see if this is the most eligible replacement |
192 | 53.5M | if (cache->cache_len[cache_next] > 0 |
193 | 53.5M | && cache->cache_age[cache_index] < |
194 | 13.6M | cache->cache_age[cache_next]) |
195 | 1.11M | cache_next = cache_index; |
196 | 53.5M | } |
197 | 68.7M | } |
198 | 356M | else { |
199 | 356M | cache_next = cache_index; |
200 | 356M | } |
201 | 425M | } |
202 | | |
203 | | // if we didn't find it, then load it into the cache_next entry |
204 | 13.2M | if (read_count == 0) { |
205 | 120k | timer.start(); |
206 | | |
207 | 120k | size_t read_size = 0; |
208 | | |
209 | | // round the offset down to a sector boundary |
210 | 120k | cache->cache_off[cache_next] = (a_off / 512) * 512; |
211 | | |
212 | | /* |
213 | | if (tsk_verbose) |
214 | | fprintf(stderr, |
215 | | "tsk_img_read: Loading data into cache %d (%" PRIdOFF |
216 | | ")\n", cache_next, a_img_info->cache_off[cache_next]); |
217 | | */ |
218 | | |
219 | | // Read a full cache block or the remaining data. |
220 | 120k | read_size = TSK_IMG_INFO_CACHE_LEN; |
221 | | |
222 | 120k | if (cache->cache_off[cache_next] + (TSK_OFF_T)read_size > |
223 | 120k | a_img_info->size) { |
224 | 98.1k | read_size = |
225 | 98.1k | (size_t) (a_img_info->size - |
226 | 98.1k | cache->cache_off[cache_next]); |
227 | 98.1k | } |
228 | | |
229 | 120k | read_count = iif->read(a_img_info, |
230 | 120k | cache->cache_off[cache_next], |
231 | 120k | cache->cache[cache_next], read_size); |
232 | | |
233 | | // if no error, then set the variables and copy the data |
234 | | // Although a read_count of -1 indicates an error, |
235 | | // since read_count is used in the calculation it may not be negative. |
236 | | // Also it does not make sense to copy data when the read_count is 0. |
237 | 120k | if (read_count > 0) { |
238 | | |
239 | 120k | TSK_OFF_T rel_off = 0; |
240 | 120k | cache->cache_age[cache_next] = CACHE_AGE; |
241 | 120k | cache->cache_len[cache_next] = read_count; |
242 | | |
243 | | // Determine the offset relative to the start of the cached data. |
244 | 120k | rel_off = a_off - cache->cache_off[cache_next]; |
245 | | |
246 | | // Make sure we were able to read sufficient data into the cache. |
247 | 120k | if (rel_off > (TSK_OFF_T) read_count) { |
248 | 0 | len2 = 0; |
249 | 0 | } |
250 | | // Make sure not to copy more than is available in the cache. |
251 | 120k | else if (rel_off + (TSK_OFF_T) len2 > (TSK_OFF_T) read_count) { |
252 | 0 | len2 = (size_t) (read_count - rel_off); |
253 | 0 | } |
254 | | // Only copy data when we have something to copy. |
255 | 120k | if (len2 > 0) { |
256 | 49.6k | memcpy(a_buf, &(cache->cache[cache_next][rel_off]), len2); |
257 | 49.6k | } |
258 | 120k | read_count = (ssize_t) len2; |
259 | 120k | } |
260 | 0 | else { |
261 | 0 | cache->cache_len[cache_next] = 0; |
262 | 0 | cache->cache_age[cache_next] = 0; |
263 | 0 | cache->cache_off[cache_next] = 0; |
264 | | |
265 | | // Something went wrong so let's try skipping the cache |
266 | 0 | read_count = img_read_no_cache(a_img_info, a_off, a_buf, a_len); |
267 | 0 | } |
268 | | |
269 | 120k | timer.stop(); |
270 | 120k | stats.miss_ns += timer.elapsed(); |
271 | 120k | ++stats.misses; |
272 | 120k | stats.miss_bytes += read_count; |
273 | 120k | } |
274 | 13.1M | else { |
275 | 13.1M | timer.stop(); |
276 | 13.1M | stats.hit_ns += timer.elapsed(); |
277 | 13.1M | } |
278 | | |
279 | 13.2M | cache->unlock(); |
280 | 13.2M | return read_count; |
281 | 13.3M | } |
282 | | |
283 | | /** |
284 | | * \ingroup imglib |
285 | | * Reads data from an open disk image |
286 | | * @param a_img_info Disk image to read from |
287 | | * @param a_off Byte offset to start reading from |
288 | | * @param a_buf Buffer to read into |
289 | | * @param a_len Number of bytes to read into buffer |
290 | | * @returns -1 on error or number of bytes read |
291 | | */ |
292 | | ssize_t |
293 | | tsk_img_read(TSK_IMG_INFO * a_img_info, TSK_OFF_T a_off, |
294 | | char *a_buf, size_t a_len) |
295 | 13.6M | { |
296 | 13.6M | if (a_img_info == NULL) { |
297 | 0 | tsk_error_reset(); |
298 | 0 | tsk_error_set_errno(TSK_ERR_IMG_ARG); |
299 | 0 | tsk_error_set_errstr("tsk_img_read: a_img_info: NULL"); |
300 | 0 | return -1; |
301 | 0 | } |
302 | | |
303 | | // Do not allow a_buf to be NULL. |
304 | 13.6M | if (a_buf == NULL) { |
305 | 0 | tsk_error_reset(); |
306 | 0 | tsk_error_set_errno(TSK_ERR_IMG_ARG); |
307 | 0 | tsk_error_set_errstr("tsk_img_read: a_buf: NULL"); |
308 | 0 | return -1; |
309 | 0 | } |
310 | | |
311 | | // The function cannot handle negative offsets. |
312 | 13.6M | if (a_off < 0) { |
313 | 38.9k | tsk_error_reset(); |
314 | 38.9k | tsk_error_set_errno(TSK_ERR_IMG_ARG); |
315 | 38.9k | tsk_error_set_errstr("tsk_img_read: a_off: %" PRIdOFF, a_off); |
316 | 38.9k | return -1; |
317 | 38.9k | } |
318 | | |
319 | | // TODO: why not just return 0 here (and be POSIX compliant)? |
320 | 13.5M | if (a_off >= a_img_info->size) { |
321 | 238k | tsk_error_reset(); |
322 | 238k | tsk_error_set_errno(TSK_ERR_IMG_READ_OFF); |
323 | 238k | tsk_error_set_errstr("tsk_img_read - %" PRIdOFF, a_off); |
324 | 238k | return -1; |
325 | 238k | } |
326 | | |
327 | | // FIXME: This check is ridiculous. It will fail only when you pass |
328 | | // in a buffer length that won't fit into 63 bits. You cannot allocate |
329 | | // a buffer that size, and anyway this is here only because no one was |
330 | | // sufficiently careful about the arithmetic below to avoid overflow. |
331 | | // The correct solution is to fix the arithemetic. |
332 | | // |
333 | | // Protect a_off against overflowing when a_len is added since TSK_OFF_T |
334 | | // maps to an int64 we prefer it over size_t although likely checking |
335 | | // for ( a_len > SSIZE_MAX ) is better but the code does not seem to |
336 | | // use that approach. |
337 | | |
338 | 13.3M | if ((TSK_OFF_T) a_len < 0) { |
339 | 0 | tsk_error_reset(); |
340 | 0 | tsk_error_set_errno(TSK_ERR_IMG_ARG); |
341 | 0 | tsk_error_set_errstr("tsk_img_read: a_len: %" PRIuSIZE, a_len); |
342 | 0 | return -1; |
343 | 0 | } |
344 | | |
345 | 13.3M | return reinterpret_cast<IMG_INFO*>(a_img_info)->cache_read(a_img_info, a_off, a_buf, a_len); |
346 | 13.3M | } |