/src/e2fsprogs/lib/ext2fs/unix_io.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * unix_io.c --- This is the Unix (well, really POSIX) implementation |
3 | | * of the I/O manager. |
4 | | * |
5 | | * Implements a one-block write-through cache. |
6 | | * |
7 | | * Includes support for Windows NT support under Cygwin. |
8 | | * |
9 | | * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, |
10 | | * 2002 by Theodore Ts'o. |
11 | | * |
12 | | * %Begin-Header% |
13 | | * This file may be redistributed under the terms of the GNU Library |
14 | | * General Public License, version 2. |
15 | | * %End-Header% |
16 | | */ |
17 | | |
18 | | #if !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__) |
19 | | #define _XOPEN_SOURCE 600 |
20 | | #define _DARWIN_C_SOURCE |
21 | | #ifndef _LARGEFILE_SOURCE |
22 | | #define _LARGEFILE_SOURCE |
23 | | #endif |
24 | | #ifndef _LARGEFILE64_SOURCE |
25 | | #define _LARGEFILE64_SOURCE |
26 | | #endif |
27 | | #ifndef _GNU_SOURCE |
28 | | #define _GNU_SOURCE |
29 | | #endif |
30 | | #endif |
31 | | |
32 | | #include "config.h" |
33 | | #include <stdio.h> |
34 | | #include <string.h> |
35 | | #if HAVE_UNISTD_H |
36 | | #include <unistd.h> |
37 | | #endif |
38 | | #if HAVE_ERRNO_H |
39 | | #include <errno.h> |
40 | | #endif |
41 | | #include <fcntl.h> |
42 | | #include <time.h> |
43 | | #ifdef __linux__ |
44 | | #include <sys/utsname.h> |
45 | | #endif |
46 | | #if HAVE_SYS_TYPES_H |
47 | | #include <sys/types.h> |
48 | | #endif |
49 | | #ifdef HAVE_SYS_IOCTL_H |
50 | | #include <sys/ioctl.h> |
51 | | #endif |
52 | | #ifdef HAVE_SYS_MOUNT_H |
53 | | #include <sys/mount.h> |
54 | | #endif |
55 | | #if HAVE_SYS_STAT_H |
56 | | #include <sys/stat.h> |
57 | | #endif |
58 | | #if HAVE_SYS_RESOURCE_H |
59 | | #include <sys/resource.h> |
60 | | #endif |
61 | | #if HAVE_LINUX_FALLOC_H |
62 | | #include <linux/falloc.h> |
63 | | #endif |
64 | | #ifdef HAVE_PTHREAD |
65 | | #include <pthread.h> |
66 | | #endif |
67 | | |
68 | | #if defined(__linux__) && defined(_IO) && !defined(BLKROGET) |
69 | | #define BLKROGET _IO(0x12, 94) /* Get read-only status (0 = read_write). */ |
70 | | #endif |
71 | | |
72 | | #undef ALIGN_DEBUG |
73 | | |
74 | | #include "ext2_fs.h" |
75 | | #include "ext2fs.h" |
76 | | #include "ext2fsP.h" |
77 | | |
78 | | /* |
79 | | * For checking structure magic numbers... |
80 | | */ |
81 | | |
82 | | #define EXT2_CHECK_MAGIC(struct, code) \ |
83 | 14.7M | if ((struct)->magic != (code)) return (code) |
84 | | |
85 | | struct unix_cache { |
86 | | char *buf; |
87 | | unsigned long long block; |
88 | | int access_time; |
89 | | unsigned dirty:1; |
90 | | unsigned in_use:1; |
91 | | unsigned write_err:1; |
92 | | }; |
93 | | |
94 | 176k | #define CACHE_SIZE 8 |
95 | 15.7k | #define WRITE_DIRECT_SIZE 4 /* Must be smaller than CACHE_SIZE */ |
96 | | #define READ_DIRECT_SIZE 4 /* Should be smaller than CACHE_SIZE */ |
97 | | |
98 | | struct unix_private_data { |
99 | | int magic; |
100 | | int dev; |
101 | | int flags; |
102 | | int align; |
103 | | int access_time; |
104 | | ext2_loff_t offset; |
105 | | struct unix_cache cache[CACHE_SIZE]; |
106 | | void *bounce; |
107 | | struct struct_io_stats io_stats; |
108 | | #ifdef HAVE_PTHREAD |
109 | | pthread_mutex_t cache_mutex; |
110 | | pthread_mutex_t bounce_mutex; |
111 | | pthread_mutex_t stats_mutex; |
112 | | #endif |
113 | | }; |
114 | | |
115 | 0 | #define IS_ALIGNED(n, align) ((((uintptr_t) n) & \ |
116 | 0 | ((uintptr_t) ((align)-1))) == 0) |
117 | | |
118 | | typedef enum lock_kind { |
119 | | CACHE_MTX, BOUNCE_MTX, STATS_MTX |
120 | | } kind_t; |
121 | | |
122 | | #ifdef HAVE_PTHREAD |
123 | | static inline pthread_mutex_t *get_mutex(struct unix_private_data *data, |
124 | | kind_t kind) |
125 | 55.4k | { |
126 | 55.4k | if (data->flags & IO_FLAG_THREADS) { |
127 | 0 | switch (kind) { |
128 | 0 | case CACHE_MTX: |
129 | 0 | return &data->cache_mutex; |
130 | 0 | case BOUNCE_MTX: |
131 | 0 | return &data->bounce_mutex; |
132 | 0 | case STATS_MTX: |
133 | 0 | return &data->stats_mutex; |
134 | 0 | } |
135 | 0 | } |
136 | 55.4k | return NULL; |
137 | 55.4k | } |
138 | | #endif |
139 | | |
140 | | static inline void mutex_lock(struct unix_private_data *data, kind_t kind) |
141 | 27.7k | { |
142 | 27.7k | #ifdef HAVE_PTHREAD |
143 | 27.7k | pthread_mutex_t *mtx = get_mutex(data,kind); |
144 | | |
145 | 27.7k | if (mtx) |
146 | 0 | pthread_mutex_lock(mtx); |
147 | 27.7k | #endif |
148 | 27.7k | } |
149 | | |
150 | | static inline void mutex_unlock(struct unix_private_data *data, kind_t kind) |
151 | 27.7k | { |
152 | 27.7k | #ifdef HAVE_PTHREAD |
153 | 27.7k | pthread_mutex_t *mtx = get_mutex(data,kind); |
154 | | |
155 | 27.7k | if (mtx) |
156 | 0 | pthread_mutex_unlock(mtx); |
157 | 27.7k | #endif |
158 | 27.7k | } |
159 | | |
160 | | static errcode_t unix_get_stats(io_channel channel, io_stats *stats) |
161 | 427 | { |
162 | 427 | errcode_t retval = 0; |
163 | | |
164 | 427 | struct unix_private_data *data; |
165 | | |
166 | 427 | EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); |
167 | 427 | data = (struct unix_private_data *) channel->private_data; |
168 | 427 | EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); |
169 | | |
170 | 427 | if (stats) { |
171 | 427 | mutex_lock(data, STATS_MTX); |
172 | 427 | *stats = &data->io_stats; |
173 | 427 | mutex_unlock(data, STATS_MTX); |
174 | 427 | } |
175 | | |
176 | 427 | return retval; |
177 | 427 | } |
178 | | |
179 | | /* |
180 | | * Here are the raw I/O functions |
181 | | */ |
182 | | static errcode_t raw_read_blk(io_channel channel, |
183 | | struct unix_private_data *data, |
184 | | unsigned long long block, |
185 | | int count, void *bufv) |
186 | 5.97k | { |
187 | 5.97k | errcode_t retval; |
188 | 5.97k | ssize_t size; |
189 | 5.97k | ext2_loff_t location; |
190 | 5.97k | int actual = 0; |
191 | 5.97k | unsigned char *buf = bufv; |
192 | 5.97k | ssize_t really_read = 0; |
193 | 5.97k | unsigned long long aligned_blk; |
194 | 5.97k | int align_size, offset; |
195 | | |
196 | 5.97k | size = (count < 0) ? -count : (ext2_loff_t) count * channel->block_size; |
197 | 5.97k | mutex_lock(data, STATS_MTX); |
198 | 5.97k | data->io_stats.bytes_read += size; |
199 | 5.97k | mutex_unlock(data, STATS_MTX); |
200 | 5.97k | location = ((ext2_loff_t) block * channel->block_size) + data->offset; |
201 | | |
202 | 5.97k | if (data->flags & IO_FLAG_FORCE_BOUNCE) |
203 | 0 | goto bounce_read; |
204 | | |
205 | 5.97k | #ifdef HAVE_PREAD64 |
206 | | /* Try an aligned pread */ |
207 | 5.97k | if ((channel->align == 0) || |
208 | 5.97k | (IS_ALIGNED(buf, channel->align) && |
209 | 0 | IS_ALIGNED(location, channel->align) && |
210 | 5.97k | IS_ALIGNED(size, channel->align))) { |
211 | 5.97k | actual = pread64(data->dev, buf, size, location); |
212 | 5.97k | if (actual == size) |
213 | 5.76k | return 0; |
214 | 211 | actual = 0; |
215 | 211 | } |
216 | | #elif HAVE_PREAD |
217 | | /* Try an aligned pread */ |
218 | | if ((sizeof(off_t) >= sizeof(ext2_loff_t)) && |
219 | | ((channel->align == 0) || |
220 | | (IS_ALIGNED(buf, channel->align) && |
221 | | IS_ALIGNED(location, channel->align) && |
222 | | IS_ALIGNED(size, channel->align)))) { |
223 | | actual = pread(data->dev, buf, size, location); |
224 | | if (actual == size) |
225 | | return 0; |
226 | | actual = 0; |
227 | | } |
228 | | #endif /* HAVE_PREAD */ |
229 | | |
230 | 211 | if ((channel->align == 0) || |
231 | 211 | (IS_ALIGNED(buf, channel->align) && |
232 | 0 | IS_ALIGNED(location, channel->align) && |
233 | 211 | IS_ALIGNED(size, channel->align))) { |
234 | 211 | mutex_lock(data, BOUNCE_MTX); |
235 | 211 | if (ext2fs_llseek(data->dev, location, SEEK_SET) < 0) { |
236 | 13 | retval = errno ? errno : EXT2_ET_LLSEEK_FAILED; |
237 | 13 | goto error_unlock; |
238 | 13 | } |
239 | 198 | actual = read(data->dev, buf, size); |
240 | 198 | if (actual != size) { |
241 | 198 | short_read: |
242 | 198 | if (actual < 0) { |
243 | 0 | retval = errno; |
244 | 0 | actual = 0; |
245 | 0 | } else |
246 | 198 | retval = EXT2_ET_SHORT_READ; |
247 | 198 | goto error_unlock; |
248 | 198 | } |
249 | 0 | goto success_unlock; |
250 | 198 | } |
251 | | |
252 | | #ifdef ALIGN_DEBUG |
253 | | printf("raw_read_blk: O_DIRECT fallback: %p %lu\n", buf, |
254 | | (unsigned long) size); |
255 | | #endif |
256 | | |
257 | | /* |
258 | | * The buffer or size which we're trying to read isn't aligned |
259 | | * to the O_DIRECT rules, so we need to do this the hard way... |
260 | | */ |
261 | 0 | bounce_read: |
262 | 0 | if (channel->align == 0) |
263 | 0 | channel->align = 1; |
264 | 0 | if ((channel->block_size > channel->align) && |
265 | 0 | (channel->block_size % channel->align) == 0) |
266 | 0 | align_size = channel->block_size; |
267 | 0 | else |
268 | 0 | align_size = channel->align; |
269 | 0 | aligned_blk = location / align_size; |
270 | 0 | offset = location % align_size; |
271 | |
|
272 | 0 | mutex_lock(data, BOUNCE_MTX); |
273 | 0 | if (ext2fs_llseek(data->dev, aligned_blk * align_size, SEEK_SET) < 0) { |
274 | 0 | retval = errno ? errno : EXT2_ET_LLSEEK_FAILED; |
275 | 0 | goto error_unlock; |
276 | 0 | } |
277 | 0 | while (size > 0) { |
278 | 0 | actual = read(data->dev, data->bounce, align_size); |
279 | 0 | if (actual != align_size) { |
280 | 0 | actual = really_read; |
281 | 0 | buf -= really_read; |
282 | 0 | size += really_read; |
283 | 0 | goto short_read; |
284 | 0 | } |
285 | 0 | if ((actual + offset) > align_size) |
286 | 0 | actual = align_size - offset; |
287 | 0 | if (actual > size) |
288 | 0 | actual = size; |
289 | 0 | memcpy(buf, (char *)data->bounce + offset, actual); |
290 | |
|
291 | 0 | really_read += actual; |
292 | 0 | size -= actual; |
293 | 0 | buf += actual; |
294 | 0 | offset = 0; |
295 | 0 | aligned_blk++; |
296 | 0 | } |
297 | 0 | success_unlock: |
298 | 0 | mutex_unlock(data, BOUNCE_MTX); |
299 | 0 | return 0; |
300 | | |
301 | 211 | error_unlock: |
302 | 211 | mutex_unlock(data, BOUNCE_MTX); |
303 | 211 | if (actual >= 0 && actual < size) |
304 | 211 | memset((char *) buf+actual, 0, size-actual); |
305 | 211 | if (channel->read_error) |
306 | 0 | retval = (channel->read_error)(channel, block, count, buf, |
307 | 0 | size, actual, retval); |
308 | 211 | return retval; |
309 | 0 | } |
310 | | |
311 | 0 | #define RAW_WRITE_NO_HANDLER 1 |
312 | | |
313 | | static errcode_t raw_write_blk(io_channel channel, |
314 | | struct unix_private_data *data, |
315 | | unsigned long long block, |
316 | | int count, const void *bufv, |
317 | | int flags) |
318 | 0 | { |
319 | 0 | ssize_t size; |
320 | 0 | ext2_loff_t location; |
321 | 0 | int actual = 0; |
322 | 0 | errcode_t retval; |
323 | 0 | const unsigned char *buf = bufv; |
324 | 0 | unsigned long long aligned_blk; |
325 | 0 | int align_size, offset; |
326 | |
|
327 | 0 | if (count == 1) |
328 | 0 | size = channel->block_size; |
329 | 0 | else { |
330 | 0 | if (count < 0) |
331 | 0 | size = -count; |
332 | 0 | else |
333 | 0 | size = (ext2_loff_t) count * channel->block_size; |
334 | 0 | } |
335 | 0 | mutex_lock(data, STATS_MTX); |
336 | 0 | data->io_stats.bytes_written += size; |
337 | 0 | mutex_unlock(data, STATS_MTX); |
338 | |
|
339 | 0 | location = ((ext2_loff_t) block * channel->block_size) + data->offset; |
340 | |
|
341 | 0 | if (data->flags & IO_FLAG_FORCE_BOUNCE) |
342 | 0 | goto bounce_write; |
343 | | |
344 | 0 | #ifdef HAVE_PWRITE64 |
345 | | /* Try an aligned pwrite */ |
346 | 0 | if ((channel->align == 0) || |
347 | 0 | (IS_ALIGNED(buf, channel->align) && |
348 | 0 | IS_ALIGNED(location, channel->align) && |
349 | 0 | IS_ALIGNED(size, channel->align))) { |
350 | 0 | actual = pwrite64(data->dev, buf, size, location); |
351 | 0 | if (actual == size) |
352 | 0 | return 0; |
353 | 0 | } |
354 | | #elif HAVE_PWRITE |
355 | | /* Try an aligned pwrite */ |
356 | | if ((sizeof(off_t) >= sizeof(ext2_loff_t)) && |
357 | | ((channel->align == 0) || |
358 | | (IS_ALIGNED(buf, channel->align) && |
359 | | IS_ALIGNED(location, channel->align) && |
360 | | IS_ALIGNED(size, channel->align)))) { |
361 | | actual = pwrite(data->dev, buf, size, location); |
362 | | if (actual == size) |
363 | | return 0; |
364 | | } |
365 | | #endif /* HAVE_PWRITE */ |
366 | | |
367 | 0 | if ((channel->align == 0) || |
368 | 0 | (IS_ALIGNED(buf, channel->align) && |
369 | 0 | IS_ALIGNED(location, channel->align) && |
370 | 0 | IS_ALIGNED(size, channel->align))) { |
371 | 0 | mutex_lock(data, BOUNCE_MTX); |
372 | 0 | if (ext2fs_llseek(data->dev, location, SEEK_SET) < 0) { |
373 | 0 | retval = errno ? errno : EXT2_ET_LLSEEK_FAILED; |
374 | 0 | goto error_unlock; |
375 | 0 | } |
376 | 0 | actual = write(data->dev, buf, size); |
377 | 0 | mutex_unlock(data, BOUNCE_MTX); |
378 | 0 | if (actual < 0) { |
379 | 0 | retval = errno; |
380 | 0 | goto error_out; |
381 | 0 | } |
382 | 0 | if (actual != size) { |
383 | 0 | short_write: |
384 | 0 | retval = EXT2_ET_SHORT_WRITE; |
385 | 0 | goto error_out; |
386 | 0 | } |
387 | 0 | return 0; |
388 | 0 | } |
389 | | |
390 | | #ifdef ALIGN_DEBUG |
391 | | printf("raw_write_blk: O_DIRECT fallback: %p %lu\n", buf, |
392 | | (unsigned long) size); |
393 | | #endif |
394 | | /* |
395 | | * The buffer or size which we're trying to write isn't aligned |
396 | | * to the O_DIRECT rules, so we need to do this the hard way... |
397 | | */ |
398 | 0 | bounce_write: |
399 | 0 | if (channel->align == 0) |
400 | 0 | channel->align = 1; |
401 | 0 | if ((channel->block_size > channel->align) && |
402 | 0 | (channel->block_size % channel->align) == 0) |
403 | 0 | align_size = channel->block_size; |
404 | 0 | else |
405 | 0 | align_size = channel->align; |
406 | 0 | aligned_blk = location / align_size; |
407 | 0 | offset = location % align_size; |
408 | |
|
409 | 0 | while (size > 0) { |
410 | 0 | int actual_w; |
411 | |
|
412 | 0 | mutex_lock(data, BOUNCE_MTX); |
413 | 0 | if (size < align_size || offset) { |
414 | 0 | if (ext2fs_llseek(data->dev, aligned_blk * align_size, |
415 | 0 | SEEK_SET) < 0) { |
416 | 0 | retval = errno ? errno : EXT2_ET_LLSEEK_FAILED; |
417 | 0 | goto error_unlock; |
418 | 0 | } |
419 | 0 | actual = read(data->dev, data->bounce, |
420 | 0 | align_size); |
421 | 0 | if (actual != align_size) { |
422 | 0 | if (actual < 0) { |
423 | 0 | retval = errno; |
424 | 0 | goto error_unlock; |
425 | 0 | } |
426 | 0 | memset((char *) data->bounce + actual, 0, |
427 | 0 | align_size - actual); |
428 | 0 | } |
429 | 0 | } |
430 | 0 | actual = size; |
431 | 0 | if ((actual + offset) > align_size) |
432 | 0 | actual = align_size - offset; |
433 | 0 | if (actual > size) |
434 | 0 | actual = size; |
435 | 0 | memcpy(((char *)data->bounce) + offset, buf, actual); |
436 | 0 | if (ext2fs_llseek(data->dev, aligned_blk * align_size, SEEK_SET) < 0) { |
437 | 0 | retval = errno ? errno : EXT2_ET_LLSEEK_FAILED; |
438 | 0 | goto error_unlock; |
439 | 0 | } |
440 | 0 | actual_w = write(data->dev, data->bounce, align_size); |
441 | 0 | mutex_unlock(data, BOUNCE_MTX); |
442 | 0 | if (actual_w < 0) { |
443 | 0 | retval = errno; |
444 | 0 | goto error_out; |
445 | 0 | } |
446 | 0 | if (actual_w != align_size) |
447 | 0 | goto short_write; |
448 | 0 | size -= actual; |
449 | 0 | buf += actual; |
450 | 0 | location += actual; |
451 | 0 | aligned_blk++; |
452 | 0 | offset = 0; |
453 | 0 | } |
454 | 0 | return 0; |
455 | | |
456 | 0 | error_unlock: |
457 | 0 | mutex_unlock(data, BOUNCE_MTX); |
458 | 0 | error_out: |
459 | 0 | if (((flags & RAW_WRITE_NO_HANDLER) == 0) && channel->write_error) |
460 | 0 | retval = (channel->write_error)(channel, block, count, buf, |
461 | 0 | size, actual, retval); |
462 | 0 | return retval; |
463 | 0 | } |
464 | | |
465 | | |
466 | | /* |
467 | | * Here we implement the cache functions |
468 | | */ |
469 | | |
470 | | /* Allocate the cache buffers */ |
471 | | static errcode_t alloc_cache(io_channel channel, |
472 | | struct unix_private_data *data) |
473 | 1.27k | { |
474 | 1.27k | errcode_t retval; |
475 | 1.27k | struct unix_cache *cache; |
476 | 1.27k | int i; |
477 | | |
478 | 1.27k | data->access_time = 0; |
479 | 11.4k | for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { |
480 | 10.1k | cache->block = 0; |
481 | 10.1k | cache->access_time = 0; |
482 | 10.1k | cache->dirty = 0; |
483 | 10.1k | cache->in_use = 0; |
484 | 10.1k | if (cache->buf) |
485 | 0 | ext2fs_free_mem(&cache->buf); |
486 | 10.1k | retval = io_channel_alloc_buf(channel, 0, &cache->buf); |
487 | 10.1k | if (retval) |
488 | 0 | return retval; |
489 | 10.1k | } |
490 | 1.27k | if (channel->align || data->flags & IO_FLAG_FORCE_BOUNCE) { |
491 | 0 | if (data->bounce) |
492 | 0 | ext2fs_free_mem(&data->bounce); |
493 | 0 | retval = io_channel_alloc_buf(channel, 0, &data->bounce); |
494 | 0 | } |
495 | 1.27k | return retval; |
496 | 1.27k | } |
497 | | |
498 | | /* Free the cache buffers */ |
499 | | static void free_cache(struct unix_private_data *data) |
500 | 1.27k | { |
501 | 1.27k | struct unix_cache *cache; |
502 | 1.27k | int i; |
503 | | |
504 | 1.27k | data->access_time = 0; |
505 | 11.4k | for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { |
506 | 10.1k | cache->block = 0; |
507 | 10.1k | cache->access_time = 0; |
508 | 10.1k | cache->dirty = 0; |
509 | 10.1k | cache->in_use = 0; |
510 | 10.1k | if (cache->buf) |
511 | 10.1k | ext2fs_free_mem(&cache->buf); |
512 | 10.1k | } |
513 | 1.27k | if (data->bounce) |
514 | 0 | ext2fs_free_mem(&data->bounce); |
515 | 1.27k | } |
516 | | |
517 | | #ifndef NO_IO_CACHE |
518 | | /* |
519 | | * Try to find a block in the cache. If the block is not found, and |
520 | | * eldest is a non-zero pointer, then fill in eldest with the cache |
521 | | * entry to that should be reused. |
522 | | */ |
523 | | static struct unix_cache *find_cached_block(struct unix_private_data *data, |
524 | | unsigned long long block, |
525 | | struct unix_cache **eldest) |
526 | 20.5k | { |
527 | 20.5k | struct unix_cache *cache, *unused_cache, *oldest_cache; |
528 | 20.5k | int i; |
529 | | |
530 | 20.5k | unused_cache = oldest_cache = 0; |
531 | 115k | for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { |
532 | 107k | if (!cache->in_use) { |
533 | 23.1k | if (!unused_cache) |
534 | 3.69k | unused_cache = cache; |
535 | 23.1k | continue; |
536 | 23.1k | } |
537 | 84.4k | if (cache->block == block) { |
538 | 12.4k | cache->access_time = ++data->access_time; |
539 | 12.4k | return cache; |
540 | 12.4k | } |
541 | 72.0k | if (!oldest_cache || |
542 | 72.0k | (cache->access_time < oldest_cache->access_time)) |
543 | 28.3k | oldest_cache = cache; |
544 | 72.0k | } |
545 | 8.13k | if (eldest) |
546 | 3.96k | *eldest = (unused_cache) ? unused_cache : oldest_cache; |
547 | 8.13k | return 0; |
548 | 20.5k | } |
549 | | |
550 | | /* |
551 | | * Reuse a particular cache entry for another block. |
552 | | */ |
553 | | static errcode_t reuse_cache(io_channel channel, |
554 | | struct unix_private_data *data, struct unix_cache *cache, |
555 | | unsigned long long block) |
556 | 3.96k | { |
557 | 3.96k | if (cache->dirty && cache->in_use) { |
558 | 0 | errcode_t retval; |
559 | |
|
560 | 0 | retval = raw_write_blk(channel, data, cache->block, 1, |
561 | 0 | cache->buf, RAW_WRITE_NO_HANDLER); |
562 | 0 | if (retval) { |
563 | 0 | cache->write_err = 1; |
564 | 0 | return retval; |
565 | 0 | } |
566 | 0 | } |
567 | | |
568 | 3.96k | cache->in_use = 1; |
569 | 3.96k | cache->dirty = 0; |
570 | 3.96k | cache->write_err = 0; |
571 | 3.96k | cache->block = block; |
572 | 3.96k | cache->access_time = ++data->access_time; |
573 | 3.96k | return 0; |
574 | 3.96k | } |
575 | | |
576 | 0 | #define FLUSH_INVALIDATE 0x01 |
577 | 8.51k | #define FLUSH_NOLOCK 0x02 |
578 | | |
579 | | /* |
580 | | * Flush all of the blocks in the cache |
581 | | */ |
582 | | static errcode_t flush_cached_blocks(io_channel channel, |
583 | | struct unix_private_data *data, |
584 | | int flags) |
585 | 4.23k | { |
586 | 4.23k | struct unix_cache *cache; |
587 | 4.23k | errcode_t retval, retval2 = 0; |
588 | 4.23k | int i; |
589 | 4.23k | int errors_found = 0; |
590 | | |
591 | 4.23k | if ((flags & FLUSH_NOLOCK) == 0) |
592 | 4.19k | mutex_lock(data, CACHE_MTX); |
593 | 38.1k | for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { |
594 | 33.8k | if (!cache->in_use || !cache->dirty) |
595 | 33.8k | continue; |
596 | 0 | retval = raw_write_blk(channel, data, |
597 | 0 | cache->block, 1, cache->buf, |
598 | 0 | RAW_WRITE_NO_HANDLER); |
599 | 0 | if (retval) { |
600 | 0 | cache->write_err = 1; |
601 | 0 | errors_found = 1; |
602 | 0 | retval2 = retval; |
603 | 0 | } else { |
604 | 0 | cache->dirty = 0; |
605 | 0 | cache->write_err = 0; |
606 | 0 | if (flags & FLUSH_INVALIDATE) |
607 | 0 | cache->in_use = 0; |
608 | 0 | } |
609 | 0 | } |
610 | 4.23k | if ((flags & FLUSH_NOLOCK) == 0) |
611 | 4.19k | mutex_unlock(data, CACHE_MTX); |
612 | 4.23k | retry: |
613 | 4.23k | while (errors_found) { |
614 | 0 | if ((flags & FLUSH_NOLOCK) == 0) |
615 | 0 | mutex_lock(data, CACHE_MTX); |
616 | 0 | errors_found = 0; |
617 | 0 | for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { |
618 | 0 | if (!cache->in_use || !cache->write_err) |
619 | 0 | continue; |
620 | 0 | errors_found = 1; |
621 | 0 | if (cache->write_err && channel->write_error) { |
622 | 0 | char *err_buf = NULL; |
623 | 0 | unsigned long long err_block = cache->block; |
624 | |
|
625 | 0 | cache->dirty = 0; |
626 | 0 | cache->in_use = 0; |
627 | 0 | cache->write_err = 0; |
628 | 0 | if (io_channel_alloc_buf(channel, 0, |
629 | 0 | &err_buf)) |
630 | 0 | err_buf = NULL; |
631 | 0 | else |
632 | 0 | memcpy(err_buf, cache->buf, |
633 | 0 | channel->block_size); |
634 | 0 | mutex_unlock(data, CACHE_MTX); |
635 | 0 | (channel->write_error)(channel, err_block, |
636 | 0 | 1, err_buf, channel->block_size, -1, |
637 | 0 | retval2); |
638 | 0 | if (err_buf) |
639 | 0 | ext2fs_free_mem(&err_buf); |
640 | 0 | goto retry; |
641 | 0 | } else |
642 | 0 | cache->write_err = 0; |
643 | 0 | } |
644 | 0 | if ((flags & FLUSH_NOLOCK) == 0) |
645 | 0 | mutex_unlock(data, CACHE_MTX); |
646 | 0 | } |
647 | 4.23k | return retval2; |
648 | 4.23k | } |
649 | | #endif /* NO_IO_CACHE */ |
650 | | |
651 | | #ifdef __linux__ |
652 | | #ifndef BLKDISCARDZEROES |
653 | 0 | #define BLKDISCARDZEROES _IO(0x12,124) |
654 | | #endif |
655 | | #endif |
656 | | |
657 | | int ext2fs_open_file(const char *pathname, int flags, mode_t mode) |
658 | 1.23k | { |
659 | 1.23k | if (mode) |
660 | 0 | #if defined(HAVE_OPEN64) && !defined(__OSX_AVAILABLE_BUT_DEPRECATED) |
661 | 0 | return open64(pathname, flags, mode); |
662 | 1.23k | else |
663 | 1.23k | return open64(pathname, flags); |
664 | | #else |
665 | | return open(pathname, flags, mode); |
666 | | else |
667 | | return open(pathname, flags); |
668 | | #endif |
669 | 1.23k | } |
670 | | |
671 | | int ext2fs_stat(const char *path, ext2fs_struct_stat *buf) |
672 | 0 | { |
673 | 0 | #if defined(HAVE_FSTAT64) && !defined(__OSX_AVAILABLE_BUT_DEPRECATED) |
674 | 0 | return stat64(path, buf); |
675 | | #else |
676 | | return stat(path, buf); |
677 | | #endif |
678 | 0 | } |
679 | | |
680 | | int ext2fs_fstat(int fd, ext2fs_struct_stat *buf) |
681 | 1.23k | { |
682 | 1.23k | #if defined(HAVE_FSTAT64) && !defined(__OSX_AVAILABLE_BUT_DEPRECATED) |
683 | 1.23k | return fstat64(fd, buf); |
684 | | #else |
685 | | return fstat(fd, buf); |
686 | | #endif |
687 | 1.23k | } |
688 | | |
689 | | |
690 | | static errcode_t unix_open_channel(const char *name, int fd, |
691 | | int flags, io_channel *channel, |
692 | | io_manager io_mgr) |
693 | 1.23k | { |
694 | 1.23k | io_channel io = NULL; |
695 | 1.23k | struct unix_private_data *data = NULL; |
696 | 1.23k | errcode_t retval; |
697 | 1.23k | ext2fs_struct_stat st; |
698 | 1.23k | #ifdef __linux__ |
699 | 1.23k | struct utsname ut; |
700 | 1.23k | #endif |
701 | | |
702 | 1.23k | if (ext2fs_safe_getenv("UNIX_IO_FORCE_BOUNCE")) |
703 | 0 | flags |= IO_FLAG_FORCE_BOUNCE; |
704 | | |
705 | 1.23k | #ifdef __linux__ |
706 | | /* |
707 | | * We need to make sure any previous errors in the block |
708 | | * device are thrown away, sigh. |
709 | | */ |
710 | 1.23k | (void) fsync(fd); |
711 | 1.23k | #endif |
712 | | |
713 | 1.23k | retval = ext2fs_get_mem(sizeof(struct struct_io_channel), &io); |
714 | 1.23k | if (retval) |
715 | 0 | goto cleanup; |
716 | 1.23k | memset(io, 0, sizeof(struct struct_io_channel)); |
717 | 1.23k | io->magic = EXT2_ET_MAGIC_IO_CHANNEL; |
718 | 1.23k | retval = ext2fs_get_mem(sizeof(struct unix_private_data), &data); |
719 | 1.23k | if (retval) |
720 | 0 | goto cleanup; |
721 | | |
722 | 1.23k | io->manager = io_mgr; |
723 | 1.23k | retval = ext2fs_get_mem(strlen(name)+1, &io->name); |
724 | 1.23k | if (retval) |
725 | 0 | goto cleanup; |
726 | | |
727 | 1.23k | strcpy(io->name, name); |
728 | 1.23k | io->private_data = data; |
729 | 1.23k | io->block_size = 1024; |
730 | 1.23k | io->read_error = 0; |
731 | 1.23k | io->write_error = 0; |
732 | 1.23k | io->refcount = 1; |
733 | 1.23k | io->flags = 0; |
734 | | |
735 | 1.23k | if (ext2fs_safe_getenv("UNIX_IO_NOZEROOUT")) |
736 | 0 | io->flags |= CHANNEL_FLAGS_NOZEROOUT; |
737 | | |
738 | 1.23k | memset(data, 0, sizeof(struct unix_private_data)); |
739 | 1.23k | data->magic = EXT2_ET_MAGIC_UNIX_IO_CHANNEL; |
740 | 1.23k | data->io_stats.num_fields = 2; |
741 | 1.23k | data->flags = flags; |
742 | 1.23k | data->dev = fd; |
743 | | |
744 | 1.23k | #if defined(O_DIRECT) |
745 | 1.23k | if (flags & IO_FLAG_DIRECT_IO) |
746 | 0 | io->align = ext2fs_get_dio_alignment(data->dev); |
747 | | #elif defined(F_NOCACHE) |
748 | | if (flags & IO_FLAG_DIRECT_IO) |
749 | | io->align = 4096; |
750 | | #endif |
751 | | |
752 | | /* |
753 | | * If the device is really a block device, then set the |
754 | | * appropriate flag, otherwise we can set DISCARD_ZEROES flag |
755 | | * because we are going to use punch hole instead of discard |
756 | | * and if it succeed, subsequent read from sparse area returns |
757 | | * zero. |
758 | | */ |
759 | 1.23k | if (ext2fs_fstat(data->dev, &st) == 0) { |
760 | 1.23k | if (ext2fsP_is_disk_device(st.st_mode)) { |
761 | 0 | #ifdef BLKDISCARDZEROES |
762 | 0 | int zeroes = 0; |
763 | |
|
764 | 0 | if (ioctl(data->dev, BLKDISCARDZEROES, &zeroes) == 0 && |
765 | 0 | zeroes) |
766 | 0 | io->flags |= CHANNEL_FLAGS_DISCARD_ZEROES; |
767 | 0 | #endif |
768 | 0 | io->flags |= CHANNEL_FLAGS_BLOCK_DEVICE; |
769 | 1.23k | } else { |
770 | 1.23k | io->flags |= CHANNEL_FLAGS_DISCARD_ZEROES; |
771 | 1.23k | } |
772 | 1.23k | } |
773 | | |
774 | | #if defined(__CYGWIN__) |
775 | | /* |
776 | | * Some operating systems require that the buffers be aligned, |
777 | | * regardless of O_DIRECT |
778 | | */ |
779 | | if (!io->align) |
780 | | io->align = 512; |
781 | | #endif |
782 | | |
783 | | #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) |
784 | | if (io->flags & CHANNEL_FLAGS_BLOCK_DEVICE) { |
785 | | int dio_align = ext2fs_get_dio_alignment(fd); |
786 | | |
787 | | if (io->align < dio_align) |
788 | | io->align = dio_align; |
789 | | } |
790 | | #endif |
791 | | |
792 | 1.23k | if ((retval = alloc_cache(io, data))) |
793 | 0 | goto cleanup; |
794 | | |
795 | 1.23k | #ifdef BLKROGET |
796 | 1.23k | if (flags & IO_FLAG_RW) { |
797 | 0 | int error; |
798 | 0 | int readonly = 0; |
799 | | |
800 | | /* Is the block device actually writable? */ |
801 | 0 | error = ioctl(data->dev, BLKROGET, &readonly); |
802 | 0 | if (!error && readonly) { |
803 | 0 | retval = EPERM; |
804 | 0 | goto cleanup; |
805 | 0 | } |
806 | 0 | } |
807 | 1.23k | #endif |
808 | | |
809 | 1.23k | #ifdef __linux__ |
810 | 1.23k | #undef RLIM_INFINITY |
811 | | #if (defined(__alpha__) || ((defined(__sparc__) || defined(__mips__)) && (SIZEOF_LONG == 4))) |
812 | | #define RLIM_INFINITY ((unsigned long)(~0UL>>1)) |
813 | | #else |
814 | 1.23k | #define RLIM_INFINITY (~0UL) |
815 | 1.23k | #endif |
816 | | /* |
817 | | * Work around a bug in 2.4.10-2.4.18 kernels where writes to |
818 | | * block devices are wrongly getting hit by the filesize |
819 | | * limit. This workaround isn't perfect, since it won't work |
820 | | * if glibc wasn't built against 2.2 header files. (Sigh.) |
821 | | * |
822 | | */ |
823 | 1.23k | if ((flags & IO_FLAG_RW) && |
824 | 1.23k | (uname(&ut) == 0) && |
825 | 1.23k | ((ut.release[0] == '2') && (ut.release[1] == '.') && |
826 | 0 | (ut.release[2] == '4') && (ut.release[3] == '.') && |
827 | 0 | (ut.release[4] == '1') && (ut.release[5] >= '0') && |
828 | 0 | (ut.release[5] < '8')) && |
829 | 1.23k | (ext2fs_fstat(data->dev, &st) == 0) && |
830 | 1.23k | (ext2fsP_is_disk_device(st.st_mode))) { |
831 | 0 | struct rlimit rlim; |
832 | |
|
833 | 0 | rlim.rlim_cur = rlim.rlim_max = (unsigned long) RLIM_INFINITY; |
834 | 0 | setrlimit(RLIMIT_FSIZE, &rlim); |
835 | 0 | getrlimit(RLIMIT_FSIZE, &rlim); |
836 | 0 | if (((unsigned long) rlim.rlim_cur) < |
837 | 0 | ((unsigned long) rlim.rlim_max)) { |
838 | 0 | rlim.rlim_cur = rlim.rlim_max; |
839 | 0 | setrlimit(RLIMIT_FSIZE, &rlim); |
840 | 0 | } |
841 | 0 | } |
842 | 1.23k | #endif |
843 | 1.23k | #ifdef HAVE_PTHREAD |
844 | 1.23k | if (flags & IO_FLAG_THREADS) { |
845 | 0 | io->flags |= CHANNEL_FLAGS_THREADS; |
846 | 0 | retval = pthread_mutex_init(&data->cache_mutex, NULL); |
847 | 0 | if (retval) |
848 | 0 | goto cleanup; |
849 | 0 | retval = pthread_mutex_init(&data->bounce_mutex, NULL); |
850 | 0 | if (retval) { |
851 | 0 | pthread_mutex_destroy(&data->cache_mutex); |
852 | 0 | goto cleanup; |
853 | 0 | } |
854 | 0 | retval = pthread_mutex_init(&data->stats_mutex, NULL); |
855 | 0 | if (retval) { |
856 | 0 | pthread_mutex_destroy(&data->cache_mutex); |
857 | 0 | pthread_mutex_destroy(&data->bounce_mutex); |
858 | 0 | goto cleanup; |
859 | 0 | } |
860 | 0 | } |
861 | 1.23k | #endif |
862 | 1.23k | *channel = io; |
863 | 1.23k | return 0; |
864 | | |
865 | 0 | cleanup: |
866 | 0 | if (data) { |
867 | 0 | if (data->dev >= 0) |
868 | 0 | close(data->dev); |
869 | 0 | free_cache(data); |
870 | 0 | ext2fs_free_mem(&data); |
871 | 0 | } |
872 | 0 | if (io) { |
873 | 0 | if (io->name) { |
874 | 0 | ext2fs_free_mem(&io->name); |
875 | 0 | } |
876 | 0 | ext2fs_free_mem(&io); |
877 | 0 | } |
878 | 0 | return retval; |
879 | 1.23k | } |
880 | | |
881 | | static errcode_t unixfd_open(const char *str_fd, int flags, |
882 | | io_channel *channel) |
883 | 0 | { |
884 | 0 | int fd; |
885 | 0 | int fd_flags; |
886 | |
|
887 | 0 | fd = atoi(str_fd); |
888 | 0 | #if defined(HAVE_FCNTL) |
889 | 0 | fd_flags = fcntl(fd, F_GETFD); |
890 | 0 | if (fd_flags == -1) |
891 | 0 | return EBADF; |
892 | | |
893 | 0 | flags = 0; |
894 | 0 | if (fd_flags & O_RDWR) |
895 | 0 | flags |= IO_FLAG_RW; |
896 | 0 | if (fd_flags & O_EXCL) |
897 | 0 | flags |= IO_FLAG_EXCLUSIVE; |
898 | 0 | #if defined(O_DIRECT) |
899 | 0 | if (fd_flags & O_DIRECT) |
900 | 0 | flags |= IO_FLAG_DIRECT_IO; |
901 | 0 | #endif |
902 | 0 | #endif /* HAVE_FCNTL */ |
903 | |
|
904 | 0 | return unix_open_channel(str_fd, fd, flags, channel, unixfd_io_manager); |
905 | 0 | } |
906 | | |
907 | | static errcode_t unix_open(const char *name, int flags, |
908 | | io_channel *channel) |
909 | 1.23k | { |
910 | 1.23k | int fd = -1; |
911 | 1.23k | int open_flags; |
912 | | |
913 | 1.23k | if (name == 0) |
914 | 0 | return EXT2_ET_BAD_DEVICE_NAME; |
915 | | |
916 | 1.23k | open_flags = (flags & IO_FLAG_RW) ? O_RDWR : O_RDONLY; |
917 | 1.23k | if (flags & IO_FLAG_EXCLUSIVE) |
918 | 0 | open_flags |= O_EXCL; |
919 | 1.23k | #if defined(O_DIRECT) |
920 | 1.23k | if (flags & IO_FLAG_DIRECT_IO) |
921 | 0 | open_flags |= O_DIRECT; |
922 | 1.23k | #endif |
923 | 1.23k | fd = ext2fs_open_file(name, open_flags, 0); |
924 | 1.23k | if (fd < 0) |
925 | 0 | return errno; |
926 | | #if defined(F_NOCACHE) && !defined(IO_DIRECT) |
927 | | if (flags & IO_FLAG_DIRECT_IO) { |
928 | | if (fcntl(fd, F_NOCACHE, 1) < 0) |
929 | | return errno; |
930 | | } |
931 | | #endif |
932 | 1.23k | return unix_open_channel(name, fd, flags, channel, unix_io_manager); |
933 | 1.23k | } |
934 | | |
935 | | static errcode_t unix_close(io_channel channel) |
936 | 1.23k | { |
937 | 1.23k | struct unix_private_data *data; |
938 | 1.23k | errcode_t retval = 0; |
939 | | |
940 | 1.23k | EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); |
941 | 1.23k | data = (struct unix_private_data *) channel->private_data; |
942 | 1.23k | EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); |
943 | | |
944 | 1.23k | if (--channel->refcount > 0) |
945 | 0 | return 0; |
946 | | |
947 | 1.23k | #ifndef NO_IO_CACHE |
948 | 1.23k | retval = flush_cached_blocks(channel, data, 0); |
949 | 1.23k | #endif |
950 | | |
951 | 1.23k | if (close(data->dev) < 0) |
952 | 0 | retval = errno; |
953 | 1.23k | free_cache(data); |
954 | 1.23k | #ifdef HAVE_PTHREAD |
955 | 1.23k | if (data->flags & IO_FLAG_THREADS) { |
956 | 0 | pthread_mutex_destroy(&data->cache_mutex); |
957 | 0 | pthread_mutex_destroy(&data->bounce_mutex); |
958 | 0 | pthread_mutex_destroy(&data->stats_mutex); |
959 | 0 | } |
960 | 1.23k | #endif |
961 | | |
962 | 1.23k | ext2fs_free_mem(&channel->private_data); |
963 | 1.23k | if (channel->name) |
964 | 1.23k | ext2fs_free_mem(&channel->name); |
965 | 1.23k | ext2fs_free_mem(&channel); |
966 | 1.23k | return retval; |
967 | 1.23k | } |
968 | | |
969 | | static errcode_t unix_set_blksize(io_channel channel, int blksize) |
970 | 2.18k | { |
971 | 2.18k | struct unix_private_data *data; |
972 | 2.18k | errcode_t retval = 0; |
973 | | |
974 | 2.18k | EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); |
975 | 2.18k | data = (struct unix_private_data *) channel->private_data; |
976 | 2.18k | EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); |
977 | | |
978 | 2.18k | if (channel->block_size != blksize) { |
979 | 38 | mutex_lock(data, CACHE_MTX); |
980 | 38 | mutex_lock(data, BOUNCE_MTX); |
981 | 38 | #ifndef NO_IO_CACHE |
982 | 38 | if ((retval = flush_cached_blocks(channel, data, FLUSH_NOLOCK))){ |
983 | 0 | mutex_unlock(data, BOUNCE_MTX); |
984 | 0 | mutex_unlock(data, CACHE_MTX); |
985 | 0 | return retval; |
986 | 0 | } |
987 | 38 | #endif |
988 | | |
989 | 38 | channel->block_size = blksize; |
990 | 38 | free_cache(data); |
991 | 38 | retval = alloc_cache(channel, data); |
992 | 38 | mutex_unlock(data, BOUNCE_MTX); |
993 | 38 | mutex_unlock(data, CACHE_MTX); |
994 | 38 | } |
995 | 2.18k | return retval; |
996 | 2.18k | } |
997 | | |
998 | | static errcode_t unix_read_blk64(io_channel channel, unsigned long long block, |
999 | | int count, void *buf) |
1000 | 16.9k | { |
1001 | 16.9k | struct unix_private_data *data; |
1002 | 16.9k | struct unix_cache *cache; |
1003 | 16.9k | errcode_t retval; |
1004 | 16.9k | char *cp; |
1005 | 16.9k | int i, j; |
1006 | | |
1007 | 16.9k | EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); |
1008 | 16.9k | data = (struct unix_private_data *) channel->private_data; |
1009 | 16.9k | EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); |
1010 | | |
1011 | | #ifdef NO_IO_CACHE |
1012 | | return raw_read_blk(channel, data, block, count, buf); |
1013 | | #else |
1014 | 16.9k | if (data->flags & IO_FLAG_NOCACHE) |
1015 | 0 | return raw_read_blk(channel, data, block, count, buf); |
1016 | | /* |
1017 | | * If we're doing an odd-sized read or a very large read, |
1018 | | * flush out the cache and then do a direct read. |
1019 | | */ |
1020 | 16.9k | if (count < 0 || count > WRITE_DIRECT_SIZE) { |
1021 | 2.96k | if ((retval = flush_cached_blocks(channel, data, 0))) |
1022 | 0 | return retval; |
1023 | 2.96k | return raw_read_blk(channel, data, block, count, buf); |
1024 | 2.96k | } |
1025 | | |
1026 | 13.9k | cp = buf; |
1027 | 13.9k | mutex_lock(data, CACHE_MTX); |
1028 | 29.0k | while (count > 0) { |
1029 | | /* If it's in the cache, use it! */ |
1030 | 15.2k | if ((cache = find_cached_block(data, block, NULL))) { |
1031 | | #ifdef DEBUG |
1032 | | printf("Using cached block %lu\n", block); |
1033 | | #endif |
1034 | 12.2k | memcpy(cp, cache->buf, channel->block_size); |
1035 | 12.2k | count--; |
1036 | 12.2k | block++; |
1037 | 12.2k | cp += channel->block_size; |
1038 | 12.2k | continue; |
1039 | 12.2k | } |
1040 | | |
1041 | | /* |
1042 | | * Find the number of uncached blocks so we can do a |
1043 | | * single read request |
1044 | | */ |
1045 | 4.17k | for (i=1; i < count; i++) |
1046 | 1.35k | if (find_cached_block(data, block+i, NULL)) |
1047 | 197 | break; |
1048 | | #ifdef DEBUG |
1049 | | printf("Reading %d blocks starting at %lu\n", i, block); |
1050 | | #endif |
1051 | 3.01k | mutex_unlock(data, CACHE_MTX); |
1052 | 3.01k | if ((retval = raw_read_blk(channel, data, block, i, cp))) |
1053 | 167 | return retval; |
1054 | 2.84k | mutex_lock(data, CACHE_MTX); |
1055 | | |
1056 | | /* Save the results in the cache */ |
1057 | 6.80k | for (j=0; j < i; j++) { |
1058 | 3.96k | if (!find_cached_block(data, block, &cache)) { |
1059 | 3.96k | retval = reuse_cache(channel, data, |
1060 | 3.96k | cache, block); |
1061 | 3.96k | if (retval) |
1062 | 0 | goto call_write_handler; |
1063 | 3.96k | memcpy(cache->buf, cp, channel->block_size); |
1064 | 3.96k | } |
1065 | 3.96k | count--; |
1066 | 3.96k | block++; |
1067 | 3.96k | cp += channel->block_size; |
1068 | 3.96k | } |
1069 | 2.84k | } |
1070 | 13.8k | mutex_unlock(data, CACHE_MTX); |
1071 | 13.8k | return 0; |
1072 | | |
1073 | 0 | call_write_handler: |
1074 | 0 | if (cache->write_err && channel->write_error) { |
1075 | 0 | char *err_buf = NULL; |
1076 | 0 | unsigned long long err_block = cache->block; |
1077 | |
|
1078 | 0 | cache->dirty = 0; |
1079 | 0 | cache->in_use = 0; |
1080 | 0 | cache->write_err = 0; |
1081 | 0 | if (io_channel_alloc_buf(channel, 0, &err_buf)) |
1082 | 0 | err_buf = NULL; |
1083 | 0 | else |
1084 | 0 | memcpy(err_buf, cache->buf, channel->block_size); |
1085 | 0 | mutex_unlock(data, CACHE_MTX); |
1086 | 0 | (channel->write_error)(channel, err_block, 1, err_buf, |
1087 | 0 | channel->block_size, -1, |
1088 | 0 | retval); |
1089 | 0 | if (err_buf) |
1090 | 0 | ext2fs_free_mem(&err_buf); |
1091 | 0 | } else |
1092 | 0 | mutex_unlock(data, CACHE_MTX); |
1093 | 0 | return retval; |
1094 | 13.9k | #endif /* NO_IO_CACHE */ |
1095 | 13.9k | } |
1096 | | |
1097 | | static errcode_t unix_read_blk(io_channel channel, unsigned long block, |
1098 | | int count, void *buf) |
1099 | 1.87k | { |
1100 | 1.87k | return unix_read_blk64(channel, block, count, buf); |
1101 | 1.87k | } |
1102 | | |
1103 | | static errcode_t unix_write_blk64(io_channel channel, unsigned long long block, |
1104 | | int count, const void *buf) |
1105 | 0 | { |
1106 | 0 | struct unix_private_data *data; |
1107 | 0 | struct unix_cache *cache, *reuse; |
1108 | 0 | errcode_t retval = 0; |
1109 | 0 | const char *cp; |
1110 | 0 | int writethrough; |
1111 | |
|
1112 | 0 | EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); |
1113 | 0 | data = (struct unix_private_data *) channel->private_data; |
1114 | 0 | EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); |
1115 | | |
1116 | | #ifdef NO_IO_CACHE |
1117 | | return raw_write_blk(channel, data, block, count, buf, 0); |
1118 | | #else |
1119 | 0 | if (data->flags & IO_FLAG_NOCACHE) |
1120 | 0 | return raw_write_blk(channel, data, block, count, buf, 0); |
1121 | | /* |
1122 | | * If we're doing an odd-sized write or a very large write, |
1123 | | * flush out the cache completely and then do a direct write. |
1124 | | */ |
1125 | 0 | if (count < 0 || count > WRITE_DIRECT_SIZE) { |
1126 | 0 | if ((retval = flush_cached_blocks(channel, data, |
1127 | 0 | FLUSH_INVALIDATE))) |
1128 | 0 | return retval; |
1129 | 0 | return raw_write_blk(channel, data, block, count, buf, 0); |
1130 | 0 | } |
1131 | | |
1132 | | /* |
1133 | | * For a moderate-sized multi-block write, first force a write |
1134 | | * if we're in write-through cache mode, and then fill the |
1135 | | * cache with the blocks. |
1136 | | */ |
1137 | 0 | writethrough = channel->flags & CHANNEL_FLAGS_WRITETHROUGH; |
1138 | 0 | if (writethrough) |
1139 | 0 | retval = raw_write_blk(channel, data, block, count, buf, 0); |
1140 | |
|
1141 | 0 | cp = buf; |
1142 | 0 | mutex_lock(data, CACHE_MTX); |
1143 | 0 | while (count > 0) { |
1144 | 0 | cache = find_cached_block(data, block, &reuse); |
1145 | 0 | if (!cache) { |
1146 | 0 | errcode_t err; |
1147 | |
|
1148 | 0 | cache = reuse; |
1149 | 0 | err = reuse_cache(channel, data, cache, block); |
1150 | 0 | if (err) |
1151 | 0 | goto call_write_handler; |
1152 | 0 | } |
1153 | 0 | if (cache->buf != cp) |
1154 | 0 | memcpy(cache->buf, cp, channel->block_size); |
1155 | 0 | cache->dirty = !writethrough; |
1156 | 0 | count--; |
1157 | 0 | block++; |
1158 | 0 | cp += channel->block_size; |
1159 | 0 | } |
1160 | 0 | mutex_unlock(data, CACHE_MTX); |
1161 | 0 | return retval; |
1162 | | |
1163 | 0 | call_write_handler: |
1164 | 0 | if (cache->write_err && channel->write_error) { |
1165 | 0 | char *err_buf = NULL; |
1166 | 0 | unsigned long long err_block = cache->block; |
1167 | |
|
1168 | 0 | cache->dirty = 0; |
1169 | 0 | cache->in_use = 0; |
1170 | 0 | cache->write_err = 0; |
1171 | 0 | if (io_channel_alloc_buf(channel, 0, &err_buf)) |
1172 | 0 | err_buf = NULL; |
1173 | 0 | else |
1174 | 0 | memcpy(err_buf, cache->buf, channel->block_size); |
1175 | 0 | mutex_unlock(data, CACHE_MTX); |
1176 | 0 | (channel->write_error)(channel, err_block, 1, err_buf, |
1177 | 0 | channel->block_size, -1, |
1178 | 0 | retval); |
1179 | 0 | if (err_buf) |
1180 | 0 | ext2fs_free_mem(&err_buf); |
1181 | 0 | } else |
1182 | 0 | mutex_unlock(data, CACHE_MTX); |
1183 | 0 | return retval; |
1184 | 0 | #endif /* NO_IO_CACHE */ |
1185 | 0 | } |
1186 | | |
1187 | | static errcode_t unix_cache_readahead(io_channel channel, |
1188 | | unsigned long long block, |
1189 | | unsigned long long count) |
1190 | 14.7M | { |
1191 | 14.7M | #ifdef POSIX_FADV_WILLNEED |
1192 | 14.7M | struct unix_private_data *data; |
1193 | | |
1194 | 14.7M | data = (struct unix_private_data *)channel->private_data; |
1195 | 14.7M | EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); |
1196 | 14.7M | return posix_fadvise(data->dev, |
1197 | 14.7M | (ext2_loff_t)block * channel->block_size + data->offset, |
1198 | 14.7M | (ext2_loff_t)count * channel->block_size, |
1199 | 14.7M | POSIX_FADV_WILLNEED); |
1200 | | #else |
1201 | | return EXT2_ET_OP_NOT_SUPPORTED; |
1202 | | #endif |
1203 | 14.7M | } |
1204 | | |
1205 | | static errcode_t unix_write_blk(io_channel channel, unsigned long block, |
1206 | | int count, const void *buf) |
1207 | 0 | { |
1208 | 0 | return unix_write_blk64(channel, block, count, buf); |
1209 | 0 | } |
1210 | | |
1211 | | static errcode_t unix_write_byte(io_channel channel, unsigned long offset, |
1212 | | int size, const void *buf) |
1213 | 0 | { |
1214 | 0 | struct unix_private_data *data; |
1215 | 0 | errcode_t retval = 0; |
1216 | 0 | ssize_t actual; |
1217 | |
|
1218 | 0 | EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); |
1219 | 0 | data = (struct unix_private_data *) channel->private_data; |
1220 | 0 | EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); |
1221 | | |
1222 | 0 | if (channel->align != 0) { |
1223 | | #ifdef ALIGN_DEBUG |
1224 | | printf("unix_write_byte: O_DIRECT fallback\n"); |
1225 | | #endif |
1226 | 0 | return EXT2_ET_UNIMPLEMENTED; |
1227 | 0 | } |
1228 | | |
1229 | 0 | #ifndef NO_IO_CACHE |
1230 | | /* |
1231 | | * Flush out the cache completely |
1232 | | */ |
1233 | 0 | if ((retval = flush_cached_blocks(channel, data, FLUSH_INVALIDATE))) |
1234 | 0 | return retval; |
1235 | 0 | #endif |
1236 | | |
1237 | 0 | if (lseek(data->dev, offset + data->offset, SEEK_SET) < 0) |
1238 | 0 | return errno; |
1239 | | |
1240 | 0 | actual = write(data->dev, buf, size); |
1241 | 0 | if (actual < 0) |
1242 | 0 | return errno; |
1243 | 0 | if (actual != size) |
1244 | 0 | return EXT2_ET_SHORT_WRITE; |
1245 | | |
1246 | 0 | return 0; |
1247 | 0 | } |
1248 | | |
1249 | | /* |
1250 | | * Flush data buffers to disk. |
1251 | | */ |
1252 | | static errcode_t unix_flush(io_channel channel) |
1253 | 0 | { |
1254 | 0 | struct unix_private_data *data; |
1255 | 0 | errcode_t retval = 0; |
1256 | |
|
1257 | 0 | EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); |
1258 | 0 | data = (struct unix_private_data *) channel->private_data; |
1259 | 0 | EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); |
1260 | | |
1261 | 0 | #ifndef NO_IO_CACHE |
1262 | 0 | retval = flush_cached_blocks(channel, data, 0); |
1263 | 0 | #endif |
1264 | 0 | #ifdef HAVE_FSYNC |
1265 | 0 | if (!retval && fsync(data->dev) != 0) |
1266 | 0 | return errno; |
1267 | 0 | #endif |
1268 | 0 | return retval; |
1269 | 0 | } |
1270 | | |
1271 | | static errcode_t unix_set_option(io_channel channel, const char *option, |
1272 | | const char *arg) |
1273 | 0 | { |
1274 | 0 | struct unix_private_data *data; |
1275 | 0 | unsigned long long tmp; |
1276 | 0 | errcode_t retval; |
1277 | 0 | char *end; |
1278 | |
|
1279 | 0 | EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); |
1280 | 0 | data = (struct unix_private_data *) channel->private_data; |
1281 | 0 | EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); |
1282 | | |
1283 | 0 | if (!strcmp(option, "offset")) { |
1284 | 0 | if (!arg) |
1285 | 0 | return EXT2_ET_INVALID_ARGUMENT; |
1286 | | |
1287 | 0 | tmp = strtoull(arg, &end, 0); |
1288 | 0 | if (*end) |
1289 | 0 | return EXT2_ET_INVALID_ARGUMENT; |
1290 | 0 | data->offset = tmp; |
1291 | 0 | if (data->offset < 0) |
1292 | 0 | return EXT2_ET_INVALID_ARGUMENT; |
1293 | 0 | return 0; |
1294 | 0 | } |
1295 | 0 | if (!strcmp(option, "cache")) { |
1296 | 0 | if (!arg) |
1297 | 0 | return EXT2_ET_INVALID_ARGUMENT; |
1298 | 0 | if (!strcmp(arg, "on")) { |
1299 | 0 | data->flags &= ~IO_FLAG_NOCACHE; |
1300 | 0 | return 0; |
1301 | 0 | } |
1302 | 0 | if (!strcmp(arg, "off")) { |
1303 | 0 | retval = flush_cached_blocks(channel, data, 0); |
1304 | 0 | data->flags |= IO_FLAG_NOCACHE; |
1305 | 0 | return retval; |
1306 | 0 | } |
1307 | 0 | return EXT2_ET_INVALID_ARGUMENT; |
1308 | 0 | } |
1309 | 0 | return EXT2_ET_INVALID_ARGUMENT; |
1310 | 0 | } |
1311 | | |
1312 | | #if defined(__linux__) && !defined(BLKDISCARD) |
1313 | 0 | #define BLKDISCARD _IO(0x12,119) |
1314 | | #endif |
1315 | | |
1316 | | static errcode_t unix_discard(io_channel channel, unsigned long long block, |
1317 | | unsigned long long count) |
1318 | 0 | { |
1319 | 0 | struct unix_private_data *data; |
1320 | 0 | int ret = EOPNOTSUPP; |
1321 | |
|
1322 | 0 | EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); |
1323 | 0 | data = (struct unix_private_data *) channel->private_data; |
1324 | 0 | EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); |
1325 | | |
1326 | 0 | if (channel->flags & CHANNEL_FLAGS_NODISCARD) |
1327 | 0 | goto unimplemented; |
1328 | | |
1329 | 0 | if (channel->flags & CHANNEL_FLAGS_BLOCK_DEVICE) { |
1330 | 0 | #ifdef BLKDISCARD |
1331 | 0 | __u64 range[2]; |
1332 | |
|
1333 | 0 | range[0] = (__u64)(block) * channel->block_size + data->offset; |
1334 | 0 | range[1] = (__u64)(count) * channel->block_size; |
1335 | |
|
1336 | 0 | ret = ioctl(data->dev, BLKDISCARD, &range); |
1337 | | #else |
1338 | | goto unimplemented; |
1339 | | #endif |
1340 | 0 | } else { |
1341 | 0 | #if defined(HAVE_FALLOCATE) && defined(FALLOC_FL_PUNCH_HOLE) |
1342 | | /* |
1343 | | * If we are not on block device, try to use punch hole |
1344 | | * to reclaim free space. |
1345 | | */ |
1346 | 0 | ret = fallocate(data->dev, |
1347 | 0 | FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, |
1348 | 0 | (off_t)(block) * channel->block_size + data->offset, |
1349 | 0 | (off_t)(count) * channel->block_size); |
1350 | | #else |
1351 | | goto unimplemented; |
1352 | | #endif |
1353 | 0 | } |
1354 | 0 | if (ret < 0) { |
1355 | 0 | if (errno == EOPNOTSUPP) { |
1356 | 0 | channel->flags |= CHANNEL_FLAGS_NODISCARD; |
1357 | 0 | goto unimplemented; |
1358 | 0 | } |
1359 | 0 | return errno; |
1360 | 0 | } |
1361 | 0 | return 0; |
1362 | 0 | unimplemented: |
1363 | 0 | return EXT2_ET_UNIMPLEMENTED; |
1364 | 0 | } |
1365 | | |
1366 | | /* |
1367 | | * If we know about ZERO_RANGE, try that before we try PUNCH_HOLE because |
1368 | | * ZERO_RANGE doesn't unmap preallocated blocks. We prefer fallocate because |
1369 | | * it always invalidates page cache, and libext2fs requires that reads after |
1370 | | * ZERO_RANGE return zeroes. |
1371 | | */ |
1372 | | static int __unix_zeroout(int fd, off_t offset, off_t len) |
1373 | 0 | { |
1374 | 0 | int ret = -1; |
1375 | |
|
1376 | 0 | #if defined(HAVE_FALLOCATE) && defined(FALLOC_FL_ZERO_RANGE) |
1377 | 0 | ret = fallocate(fd, FALLOC_FL_ZERO_RANGE, offset, len); |
1378 | 0 | if (ret == 0) |
1379 | 0 | return 0; |
1380 | 0 | #endif |
1381 | 0 | #if defined(HAVE_FALLOCATE) && defined(FALLOC_FL_PUNCH_HOLE) && defined(FALLOC_FL_KEEP_SIZE) |
1382 | 0 | ret = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, |
1383 | 0 | offset, len); |
1384 | 0 | if (ret == 0) |
1385 | 0 | return 0; |
1386 | 0 | #endif |
1387 | 0 | errno = EOPNOTSUPP; |
1388 | 0 | return ret; |
1389 | 0 | } |
1390 | | |
1391 | | /* parameters might not be used if OS doesn't support zeroout */ |
1392 | | #if __GNUC_PREREQ (4, 6) |
1393 | | #pragma GCC diagnostic push |
1394 | | #pragma GCC diagnostic ignored "-Wunused-parameter" |
1395 | | #endif |
1396 | | static errcode_t unix_zeroout(io_channel channel, unsigned long long block, |
1397 | | unsigned long long count) |
1398 | 0 | { |
1399 | 0 | struct unix_private_data *data; |
1400 | 0 | int ret; |
1401 | |
|
1402 | 0 | EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); |
1403 | 0 | data = (struct unix_private_data *) channel->private_data; |
1404 | 0 | EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); |
1405 | | |
1406 | 0 | if (!(channel->flags & CHANNEL_FLAGS_BLOCK_DEVICE)) { |
1407 | | /* Regular file, try to use truncate/punch/zero. */ |
1408 | 0 | struct stat statbuf; |
1409 | |
|
1410 | 0 | if (count == 0) |
1411 | 0 | return 0; |
1412 | | /* |
1413 | | * If we're trying to zero a range past the end of the file, |
1414 | | * extend the file size, then truncate everything. |
1415 | | */ |
1416 | 0 | ret = fstat(data->dev, &statbuf); |
1417 | 0 | if (ret) |
1418 | 0 | goto err; |
1419 | 0 | if ((unsigned long long) statbuf.st_size < |
1420 | 0 | (block + count) * channel->block_size + data->offset) { |
1421 | 0 | ret = ftruncate(data->dev, |
1422 | 0 | (block + count) * channel->block_size + data->offset); |
1423 | 0 | if (ret) |
1424 | 0 | goto err; |
1425 | 0 | } |
1426 | 0 | } |
1427 | | |
1428 | 0 | if (channel->flags & CHANNEL_FLAGS_NOZEROOUT) |
1429 | 0 | goto unimplemented; |
1430 | | |
1431 | 0 | ret = __unix_zeroout(data->dev, |
1432 | 0 | (off_t)(block) * channel->block_size + data->offset, |
1433 | 0 | (off_t)(count) * channel->block_size); |
1434 | 0 | err: |
1435 | 0 | if (ret < 0) { |
1436 | 0 | if (errno == EOPNOTSUPP) { |
1437 | 0 | channel->flags |= CHANNEL_FLAGS_NOZEROOUT; |
1438 | 0 | goto unimplemented; |
1439 | 0 | } |
1440 | 0 | return errno; |
1441 | 0 | } |
1442 | 0 | return 0; |
1443 | 0 | unimplemented: |
1444 | 0 | return EXT2_ET_UNIMPLEMENTED; |
1445 | 0 | } |
1446 | | #if __GNUC_PREREQ (4, 6) |
1447 | | #pragma GCC diagnostic pop |
1448 | | #endif |
1449 | | |
1450 | | static struct struct_io_manager struct_unix_manager = { |
1451 | | .magic = EXT2_ET_MAGIC_IO_MANAGER, |
1452 | | .name = "Unix I/O Manager", |
1453 | | .open = unix_open, |
1454 | | .close = unix_close, |
1455 | | .set_blksize = unix_set_blksize, |
1456 | | .read_blk = unix_read_blk, |
1457 | | .write_blk = unix_write_blk, |
1458 | | .flush = unix_flush, |
1459 | | .write_byte = unix_write_byte, |
1460 | | .set_option = unix_set_option, |
1461 | | .get_stats = unix_get_stats, |
1462 | | .read_blk64 = unix_read_blk64, |
1463 | | .write_blk64 = unix_write_blk64, |
1464 | | .discard = unix_discard, |
1465 | | .cache_readahead = unix_cache_readahead, |
1466 | | .zeroout = unix_zeroout, |
1467 | | }; |
1468 | | |
1469 | | io_manager unix_io_manager = &struct_unix_manager; |
1470 | | |
1471 | | static struct struct_io_manager struct_unixfd_manager = { |
1472 | | .magic = EXT2_ET_MAGIC_IO_MANAGER, |
1473 | | .name = "Unix fd I/O Manager", |
1474 | | .open = unixfd_open, |
1475 | | .close = unix_close, |
1476 | | .set_blksize = unix_set_blksize, |
1477 | | .read_blk = unix_read_blk, |
1478 | | .write_blk = unix_write_blk, |
1479 | | .flush = unix_flush, |
1480 | | .write_byte = unix_write_byte, |
1481 | | .set_option = unix_set_option, |
1482 | | .get_stats = unix_get_stats, |
1483 | | .read_blk64 = unix_read_blk64, |
1484 | | .write_blk64 = unix_write_blk64, |
1485 | | .discard = unix_discard, |
1486 | | .cache_readahead = unix_cache_readahead, |
1487 | | .zeroout = unix_zeroout, |
1488 | | }; |
1489 | | |
1490 | | io_manager unixfd_io_manager = &struct_unixfd_manager; |