Coverage Report

Created: 2024-05-05 06:22

/src/e2fsprogs/lib/ext2fs/unix_io.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * unix_io.c --- This is the Unix (well, really POSIX) implementation
3
 *  of the I/O manager.
4
 *
5
 * Implements a one-block write-through cache.
6
 *
7
 * Includes support for Windows NT support under Cygwin.
8
 *
9
 * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
10
 *  2002 by Theodore Ts'o.
11
 *
12
 * %Begin-Header%
13
 * This file may be redistributed under the terms of the GNU Library
14
 * General Public License, version 2.
15
 * %End-Header%
16
 */
17
18
#if !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__)
19
#define _XOPEN_SOURCE 600
20
#define _DARWIN_C_SOURCE
21
#ifndef _LARGEFILE_SOURCE
22
#define _LARGEFILE_SOURCE
23
#endif
24
#ifndef _LARGEFILE64_SOURCE
25
#define _LARGEFILE64_SOURCE
26
#endif
27
#ifndef _GNU_SOURCE
28
#define _GNU_SOURCE
29
#endif
30
#endif
31
32
#include "config.h"
33
#include <stdio.h>
34
#include <string.h>
35
#if HAVE_UNISTD_H
36
#include <unistd.h>
37
#endif
38
#if HAVE_ERRNO_H
39
#include <errno.h>
40
#endif
41
#include <fcntl.h>
42
#include <time.h>
43
#ifdef __linux__
44
#include <sys/utsname.h>
45
#endif
46
#if HAVE_SYS_TYPES_H
47
#include <sys/types.h>
48
#endif
49
#ifdef HAVE_SYS_IOCTL_H
50
#include <sys/ioctl.h>
51
#endif
52
#ifdef HAVE_SYS_MOUNT_H
53
#include <sys/mount.h>
54
#endif
55
#if HAVE_SYS_STAT_H
56
#include <sys/stat.h>
57
#endif
58
#if HAVE_SYS_RESOURCE_H
59
#include <sys/resource.h>
60
#endif
61
#if HAVE_LINUX_FALLOC_H
62
#include <linux/falloc.h>
63
#endif
64
#ifdef HAVE_PTHREAD
65
#include <pthread.h>
66
#endif
67
68
#if defined(__linux__) && defined(_IO) && !defined(BLKROGET)
69
#define BLKROGET   _IO(0x12, 94) /* Get read-only status (0 = read_write).  */
70
#endif
71
72
#undef ALIGN_DEBUG
73
74
#include "ext2_fs.h"
75
#include "ext2fs.h"
76
#include "ext2fsP.h"
77
78
/*
79
 * For checking structure magic numbers...
80
 */
81
82
#define EXT2_CHECK_MAGIC(struct, code) \
83
14.7M
    if ((struct)->magic != (code)) return (code)
84
85
struct unix_cache {
86
  char      *buf;
87
  unsigned long long  block;
88
  int     access_time;
89
  unsigned    dirty:1;
90
  unsigned    in_use:1;
91
  unsigned    write_err:1;
92
};
93
94
176k
#define CACHE_SIZE 8
95
15.7k
#define WRITE_DIRECT_SIZE 4  /* Must be smaller than CACHE_SIZE */
96
#define READ_DIRECT_SIZE 4  /* Should be smaller than CACHE_SIZE */
97
98
struct unix_private_data {
99
  int magic;
100
  int dev;
101
  int flags;
102
  int align;
103
  int access_time;
104
  ext2_loff_t offset;
105
  struct unix_cache cache[CACHE_SIZE];
106
  void  *bounce;
107
  struct struct_io_stats io_stats;
108
#ifdef HAVE_PTHREAD
109
  pthread_mutex_t cache_mutex;
110
  pthread_mutex_t bounce_mutex;
111
  pthread_mutex_t stats_mutex;
112
#endif
113
};
114
115
0
#define IS_ALIGNED(n, align) ((((uintptr_t) n) & \
116
0
             ((uintptr_t) ((align)-1))) == 0)
117
118
typedef enum lock_kind {
119
  CACHE_MTX, BOUNCE_MTX, STATS_MTX
120
} kind_t;
121
122
#ifdef HAVE_PTHREAD
123
static inline pthread_mutex_t *get_mutex(struct unix_private_data *data,
124
           kind_t kind)
125
55.4k
{
126
55.4k
  if (data->flags & IO_FLAG_THREADS) {
127
0
    switch (kind) {
128
0
    case CACHE_MTX:
129
0
      return &data->cache_mutex;
130
0
    case BOUNCE_MTX:
131
0
      return &data->bounce_mutex;
132
0
    case STATS_MTX:
133
0
      return &data->stats_mutex;
134
0
    }
135
0
  }
136
55.4k
  return NULL;
137
55.4k
}
138
#endif
139
140
static inline void mutex_lock(struct unix_private_data *data, kind_t kind)
141
27.7k
{
142
27.7k
#ifdef HAVE_PTHREAD
143
27.7k
  pthread_mutex_t *mtx = get_mutex(data,kind);
144
145
27.7k
  if (mtx)
146
0
    pthread_mutex_lock(mtx);
147
27.7k
#endif
148
27.7k
}
149
150
static inline void mutex_unlock(struct unix_private_data *data, kind_t kind)
151
27.7k
{
152
27.7k
#ifdef HAVE_PTHREAD
153
27.7k
  pthread_mutex_t *mtx = get_mutex(data,kind);
154
155
27.7k
  if (mtx)
156
0
    pthread_mutex_unlock(mtx);
157
27.7k
#endif
158
27.7k
}
159
160
static errcode_t unix_get_stats(io_channel channel, io_stats *stats)
161
427
{
162
427
  errcode_t retval = 0;
163
164
427
  struct unix_private_data *data;
165
166
427
  EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
167
427
  data = (struct unix_private_data *) channel->private_data;
168
427
  EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
169
170
427
  if (stats) {
171
427
    mutex_lock(data, STATS_MTX);
172
427
    *stats = &data->io_stats;
173
427
    mutex_unlock(data, STATS_MTX);
174
427
  }
175
176
427
  return retval;
177
427
}
178
179
/*
180
 * Here are the raw I/O functions
181
 */
182
static errcode_t raw_read_blk(io_channel channel,
183
            struct unix_private_data *data,
184
            unsigned long long block,
185
            int count, void *bufv)
186
5.97k
{
187
5.97k
  errcode_t retval;
188
5.97k
  ssize_t   size;
189
5.97k
  ext2_loff_t location;
190
5.97k
  int   actual = 0;
191
5.97k
  unsigned char *buf = bufv;
192
5.97k
  ssize_t   really_read = 0;
193
5.97k
  unsigned long long aligned_blk;
194
5.97k
  int   align_size, offset;
195
196
5.97k
  size = (count < 0) ? -count : (ext2_loff_t) count * channel->block_size;
197
5.97k
  mutex_lock(data, STATS_MTX);
198
5.97k
  data->io_stats.bytes_read += size;
199
5.97k
  mutex_unlock(data, STATS_MTX);
200
5.97k
  location = ((ext2_loff_t) block * channel->block_size) + data->offset;
201
202
5.97k
  if (data->flags & IO_FLAG_FORCE_BOUNCE)
203
0
    goto bounce_read;
204
205
5.97k
#ifdef HAVE_PREAD64
206
  /* Try an aligned pread */
207
5.97k
  if ((channel->align == 0) ||
208
5.97k
      (IS_ALIGNED(buf, channel->align) &&
209
0
       IS_ALIGNED(location, channel->align) &&
210
5.97k
       IS_ALIGNED(size, channel->align))) {
211
5.97k
    actual = pread64(data->dev, buf, size, location);
212
5.97k
    if (actual == size)
213
5.76k
      return 0;
214
211
    actual = 0;
215
211
  }
216
#elif HAVE_PREAD
217
  /* Try an aligned pread */
218
  if ((sizeof(off_t) >= sizeof(ext2_loff_t)) &&
219
      ((channel->align == 0) ||
220
       (IS_ALIGNED(buf, channel->align) &&
221
        IS_ALIGNED(location, channel->align) &&
222
        IS_ALIGNED(size, channel->align)))) {
223
    actual = pread(data->dev, buf, size, location);
224
    if (actual == size)
225
      return 0;
226
    actual = 0;
227
  }
228
#endif /* HAVE_PREAD */
229
230
211
  if ((channel->align == 0) ||
231
211
      (IS_ALIGNED(buf, channel->align) &&
232
0
       IS_ALIGNED(location, channel->align) &&
233
211
       IS_ALIGNED(size, channel->align))) {
234
211
    mutex_lock(data, BOUNCE_MTX);
235
211
    if (ext2fs_llseek(data->dev, location, SEEK_SET) < 0) {
236
13
      retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
237
13
      goto error_unlock;
238
13
    }
239
198
    actual = read(data->dev, buf, size);
240
198
    if (actual != size) {
241
198
    short_read:
242
198
      if (actual < 0) {
243
0
        retval = errno;
244
0
        actual = 0;
245
0
      } else
246
198
        retval = EXT2_ET_SHORT_READ;
247
198
      goto error_unlock;
248
198
    }
249
0
    goto success_unlock;
250
198
  }
251
252
#ifdef ALIGN_DEBUG
253
  printf("raw_read_blk: O_DIRECT fallback: %p %lu\n", buf,
254
         (unsigned long) size);
255
#endif
256
257
  /*
258
   * The buffer or size which we're trying to read isn't aligned
259
   * to the O_DIRECT rules, so we need to do this the hard way...
260
   */
261
0
bounce_read:
262
0
  if (channel->align == 0)
263
0
    channel->align = 1;
264
0
  if ((channel->block_size > channel->align) &&
265
0
      (channel->block_size % channel->align) == 0)
266
0
    align_size = channel->block_size;
267
0
  else
268
0
    align_size = channel->align;
269
0
  aligned_blk = location / align_size;
270
0
  offset = location % align_size;
271
272
0
  mutex_lock(data, BOUNCE_MTX);
273
0
  if (ext2fs_llseek(data->dev, aligned_blk * align_size, SEEK_SET) < 0) {
274
0
    retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
275
0
    goto error_unlock;
276
0
  }
277
0
  while (size > 0) {
278
0
    actual = read(data->dev, data->bounce, align_size);
279
0
    if (actual != align_size) {
280
0
      actual = really_read;
281
0
      buf -= really_read;
282
0
      size += really_read;
283
0
      goto short_read;
284
0
    }
285
0
    if ((actual + offset) > align_size)
286
0
      actual = align_size - offset;
287
0
    if (actual > size)
288
0
      actual = size;
289
0
    memcpy(buf, (char *)data->bounce + offset, actual);
290
291
0
    really_read += actual;
292
0
    size -= actual;
293
0
    buf += actual;
294
0
    offset = 0;
295
0
    aligned_blk++;
296
0
  }
297
0
success_unlock:
298
0
  mutex_unlock(data, BOUNCE_MTX);
299
0
  return 0;
300
301
211
error_unlock:
302
211
  mutex_unlock(data, BOUNCE_MTX);
303
211
  if (actual >= 0 && actual < size)
304
211
    memset((char *) buf+actual, 0, size-actual);
305
211
  if (channel->read_error)
306
0
    retval = (channel->read_error)(channel, block, count, buf,
307
0
                 size, actual, retval);
308
211
  return retval;
309
0
}
310
311
0
#define RAW_WRITE_NO_HANDLER  1
312
313
static errcode_t raw_write_blk(io_channel channel,
314
             struct unix_private_data *data,
315
             unsigned long long block,
316
             int count, const void *bufv,
317
             int flags)
318
0
{
319
0
  ssize_t   size;
320
0
  ext2_loff_t location;
321
0
  int   actual = 0;
322
0
  errcode_t retval;
323
0
  const unsigned char *buf = bufv;
324
0
  unsigned long long aligned_blk;
325
0
  int   align_size, offset;
326
327
0
  if (count == 1)
328
0
    size = channel->block_size;
329
0
  else {
330
0
    if (count < 0)
331
0
      size = -count;
332
0
    else
333
0
      size = (ext2_loff_t) count * channel->block_size;
334
0
  }
335
0
  mutex_lock(data, STATS_MTX);
336
0
  data->io_stats.bytes_written += size;
337
0
  mutex_unlock(data, STATS_MTX);
338
339
0
  location = ((ext2_loff_t) block * channel->block_size) + data->offset;
340
341
0
  if (data->flags & IO_FLAG_FORCE_BOUNCE)
342
0
    goto bounce_write;
343
344
0
#ifdef HAVE_PWRITE64
345
  /* Try an aligned pwrite */
346
0
  if ((channel->align == 0) ||
347
0
      (IS_ALIGNED(buf, channel->align) &&
348
0
       IS_ALIGNED(location, channel->align) &&
349
0
       IS_ALIGNED(size, channel->align))) {
350
0
    actual = pwrite64(data->dev, buf, size, location);
351
0
    if (actual == size)
352
0
      return 0;
353
0
  }
354
#elif HAVE_PWRITE
355
  /* Try an aligned pwrite */
356
  if ((sizeof(off_t) >= sizeof(ext2_loff_t)) &&
357
      ((channel->align == 0) ||
358
       (IS_ALIGNED(buf, channel->align) &&
359
        IS_ALIGNED(location, channel->align) &&
360
        IS_ALIGNED(size, channel->align)))) {
361
    actual = pwrite(data->dev, buf, size, location);
362
    if (actual == size)
363
      return 0;
364
  }
365
#endif /* HAVE_PWRITE */
366
367
0
  if ((channel->align == 0) ||
368
0
      (IS_ALIGNED(buf, channel->align) &&
369
0
       IS_ALIGNED(location, channel->align) &&
370
0
       IS_ALIGNED(size, channel->align))) {
371
0
    mutex_lock(data, BOUNCE_MTX);
372
0
    if (ext2fs_llseek(data->dev, location, SEEK_SET) < 0) {
373
0
      retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
374
0
      goto error_unlock;
375
0
    }
376
0
    actual = write(data->dev, buf, size);
377
0
    mutex_unlock(data, BOUNCE_MTX);
378
0
    if (actual < 0) {
379
0
      retval = errno;
380
0
      goto error_out;
381
0
    }
382
0
    if (actual != size) {
383
0
    short_write:
384
0
      retval = EXT2_ET_SHORT_WRITE;
385
0
      goto error_out;
386
0
    }
387
0
    return 0;
388
0
  }
389
390
#ifdef ALIGN_DEBUG
391
  printf("raw_write_blk: O_DIRECT fallback: %p %lu\n", buf,
392
         (unsigned long) size);
393
#endif
394
  /*
395
   * The buffer or size which we're trying to write isn't aligned
396
   * to the O_DIRECT rules, so we need to do this the hard way...
397
   */
398
0
bounce_write:
399
0
  if (channel->align == 0)
400
0
    channel->align = 1;
401
0
  if ((channel->block_size > channel->align) &&
402
0
      (channel->block_size % channel->align) == 0)
403
0
    align_size = channel->block_size;
404
0
  else
405
0
    align_size = channel->align;
406
0
  aligned_blk = location / align_size;
407
0
  offset = location % align_size;
408
409
0
  while (size > 0) {
410
0
    int actual_w;
411
412
0
    mutex_lock(data, BOUNCE_MTX);
413
0
    if (size < align_size || offset) {
414
0
      if (ext2fs_llseek(data->dev, aligned_blk * align_size,
415
0
            SEEK_SET) < 0) {
416
0
        retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
417
0
        goto error_unlock;
418
0
      }
419
0
      actual = read(data->dev, data->bounce,
420
0
              align_size);
421
0
      if (actual != align_size) {
422
0
        if (actual < 0) {
423
0
          retval = errno;
424
0
          goto error_unlock;
425
0
        }
426
0
        memset((char *) data->bounce + actual, 0,
427
0
               align_size - actual);
428
0
      }
429
0
    }
430
0
    actual = size;
431
0
    if ((actual + offset) > align_size)
432
0
      actual = align_size - offset;
433
0
    if (actual > size)
434
0
      actual = size;
435
0
    memcpy(((char *)data->bounce) + offset, buf, actual);
436
0
    if (ext2fs_llseek(data->dev, aligned_blk * align_size, SEEK_SET) < 0) {
437
0
      retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
438
0
      goto error_unlock;
439
0
    }
440
0
    actual_w = write(data->dev, data->bounce, align_size);
441
0
    mutex_unlock(data, BOUNCE_MTX);
442
0
    if (actual_w < 0) {
443
0
      retval = errno;
444
0
      goto error_out;
445
0
    }
446
0
    if (actual_w != align_size)
447
0
      goto short_write;
448
0
    size -= actual;
449
0
    buf += actual;
450
0
    location += actual;
451
0
    aligned_blk++;
452
0
    offset = 0;
453
0
  }
454
0
  return 0;
455
456
0
error_unlock:
457
0
  mutex_unlock(data, BOUNCE_MTX);
458
0
error_out:
459
0
  if (((flags & RAW_WRITE_NO_HANDLER) == 0) && channel->write_error)
460
0
    retval = (channel->write_error)(channel, block, count, buf,
461
0
            size, actual, retval);
462
0
  return retval;
463
0
}
464
465
466
/*
467
 * Here we implement the cache functions
468
 */
469
470
/* Allocate the cache buffers */
471
static errcode_t alloc_cache(io_channel channel,
472
           struct unix_private_data *data)
473
1.27k
{
474
1.27k
  errcode_t   retval;
475
1.27k
  struct unix_cache *cache;
476
1.27k
  int     i;
477
478
1.27k
  data->access_time = 0;
479
11.4k
  for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
480
10.1k
    cache->block = 0;
481
10.1k
    cache->access_time = 0;
482
10.1k
    cache->dirty = 0;
483
10.1k
    cache->in_use = 0;
484
10.1k
    if (cache->buf)
485
0
      ext2fs_free_mem(&cache->buf);
486
10.1k
    retval = io_channel_alloc_buf(channel, 0, &cache->buf);
487
10.1k
    if (retval)
488
0
      return retval;
489
10.1k
  }
490
1.27k
  if (channel->align || data->flags & IO_FLAG_FORCE_BOUNCE) {
491
0
    if (data->bounce)
492
0
      ext2fs_free_mem(&data->bounce);
493
0
    retval = io_channel_alloc_buf(channel, 0, &data->bounce);
494
0
  }
495
1.27k
  return retval;
496
1.27k
}
497
498
/* Free the cache buffers */
499
static void free_cache(struct unix_private_data *data)
500
1.27k
{
501
1.27k
  struct unix_cache *cache;
502
1.27k
  int     i;
503
504
1.27k
  data->access_time = 0;
505
11.4k
  for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
506
10.1k
    cache->block = 0;
507
10.1k
    cache->access_time = 0;
508
10.1k
    cache->dirty = 0;
509
10.1k
    cache->in_use = 0;
510
10.1k
    if (cache->buf)
511
10.1k
      ext2fs_free_mem(&cache->buf);
512
10.1k
  }
513
1.27k
  if (data->bounce)
514
0
    ext2fs_free_mem(&data->bounce);
515
1.27k
}
516
517
#ifndef NO_IO_CACHE
518
/*
519
 * Try to find a block in the cache.  If the block is not found, and
520
 * eldest is a non-zero pointer, then fill in eldest with the cache
521
 * entry to that should be reused.
522
 */
523
static struct unix_cache *find_cached_block(struct unix_private_data *data,
524
              unsigned long long block,
525
              struct unix_cache **eldest)
526
20.5k
{
527
20.5k
  struct unix_cache *cache, *unused_cache, *oldest_cache;
528
20.5k
  int     i;
529
530
20.5k
  unused_cache = oldest_cache = 0;
531
115k
  for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
532
107k
    if (!cache->in_use) {
533
23.1k
      if (!unused_cache)
534
3.69k
        unused_cache = cache;
535
23.1k
      continue;
536
23.1k
    }
537
84.4k
    if (cache->block == block) {
538
12.4k
      cache->access_time = ++data->access_time;
539
12.4k
      return cache;
540
12.4k
    }
541
72.0k
    if (!oldest_cache ||
542
72.0k
        (cache->access_time < oldest_cache->access_time))
543
28.3k
      oldest_cache = cache;
544
72.0k
  }
545
8.13k
  if (eldest)
546
3.96k
    *eldest = (unused_cache) ? unused_cache : oldest_cache;
547
8.13k
  return 0;
548
20.5k
}
549
550
/*
551
 * Reuse a particular cache entry for another block.
552
 */
553
static errcode_t reuse_cache(io_channel channel,
554
    struct unix_private_data *data, struct unix_cache *cache,
555
    unsigned long long block)
556
3.96k
{
557
3.96k
  if (cache->dirty && cache->in_use) {
558
0
    errcode_t retval;
559
560
0
    retval = raw_write_blk(channel, data, cache->block, 1,
561
0
               cache->buf, RAW_WRITE_NO_HANDLER);
562
0
    if (retval) {
563
0
      cache->write_err = 1;
564
0
      return retval;
565
0
    }
566
0
  }
567
568
3.96k
  cache->in_use = 1;
569
3.96k
  cache->dirty = 0;
570
3.96k
  cache->write_err = 0;
571
3.96k
  cache->block = block;
572
3.96k
  cache->access_time = ++data->access_time;
573
3.96k
  return 0;
574
3.96k
}
575
576
0
#define FLUSH_INVALIDATE  0x01
577
8.51k
#define FLUSH_NOLOCK    0x02
578
579
/*
580
 * Flush all of the blocks in the cache
581
 */
582
static errcode_t flush_cached_blocks(io_channel channel,
583
             struct unix_private_data *data,
584
             int flags)
585
4.23k
{
586
4.23k
  struct unix_cache *cache;
587
4.23k
  errcode_t   retval, retval2 = 0;
588
4.23k
  int     i;
589
4.23k
  int     errors_found = 0;
590
591
4.23k
  if ((flags & FLUSH_NOLOCK) == 0)
592
4.19k
    mutex_lock(data, CACHE_MTX);
593
38.1k
  for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
594
33.8k
    if (!cache->in_use || !cache->dirty)
595
33.8k
      continue;
596
0
    retval = raw_write_blk(channel, data,
597
0
               cache->block, 1, cache->buf,
598
0
               RAW_WRITE_NO_HANDLER);
599
0
    if (retval) {
600
0
      cache->write_err = 1;
601
0
      errors_found = 1;
602
0
      retval2 = retval;
603
0
    } else {
604
0
      cache->dirty = 0;
605
0
      cache->write_err = 0;
606
0
      if (flags & FLUSH_INVALIDATE)
607
0
        cache->in_use = 0;
608
0
    }
609
0
  }
610
4.23k
  if ((flags & FLUSH_NOLOCK) == 0)
611
4.19k
    mutex_unlock(data, CACHE_MTX);
612
4.23k
retry:
613
4.23k
  while (errors_found) {
614
0
    if ((flags & FLUSH_NOLOCK) == 0)
615
0
      mutex_lock(data, CACHE_MTX);
616
0
    errors_found = 0;
617
0
    for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
618
0
      if (!cache->in_use || !cache->write_err)
619
0
        continue;
620
0
      errors_found = 1;
621
0
      if (cache->write_err && channel->write_error) {
622
0
        char *err_buf = NULL;
623
0
        unsigned long long err_block = cache->block;
624
625
0
        cache->dirty = 0;
626
0
        cache->in_use = 0;
627
0
        cache->write_err = 0;
628
0
        if (io_channel_alloc_buf(channel, 0,
629
0
               &err_buf))
630
0
          err_buf = NULL;
631
0
        else
632
0
          memcpy(err_buf, cache->buf,
633
0
                 channel->block_size);
634
0
        mutex_unlock(data, CACHE_MTX);
635
0
        (channel->write_error)(channel, err_block,
636
0
          1, err_buf, channel->block_size, -1,
637
0
          retval2);
638
0
        if (err_buf)
639
0
          ext2fs_free_mem(&err_buf);
640
0
        goto retry;
641
0
      } else
642
0
        cache->write_err = 0;
643
0
    }
644
0
    if ((flags & FLUSH_NOLOCK) == 0)
645
0
      mutex_unlock(data, CACHE_MTX);
646
0
  }
647
4.23k
  return retval2;
648
4.23k
}
649
#endif /* NO_IO_CACHE */
650
651
#ifdef __linux__
652
#ifndef BLKDISCARDZEROES
653
0
#define BLKDISCARDZEROES _IO(0x12,124)
654
#endif
655
#endif
656
657
int ext2fs_open_file(const char *pathname, int flags, mode_t mode)
658
1.23k
{
659
1.23k
  if (mode)
660
0
#if defined(HAVE_OPEN64) && !defined(__OSX_AVAILABLE_BUT_DEPRECATED)
661
0
    return open64(pathname, flags, mode);
662
1.23k
  else
663
1.23k
    return open64(pathname, flags);
664
#else
665
    return open(pathname, flags, mode);
666
  else
667
    return open(pathname, flags);
668
#endif
669
1.23k
}
670
671
int ext2fs_stat(const char *path, ext2fs_struct_stat *buf)
672
0
{
673
0
#if defined(HAVE_FSTAT64) && !defined(__OSX_AVAILABLE_BUT_DEPRECATED)
674
0
  return stat64(path, buf);
675
#else
676
  return stat(path, buf);
677
#endif
678
0
}
679
680
int ext2fs_fstat(int fd, ext2fs_struct_stat *buf)
681
1.23k
{
682
1.23k
#if defined(HAVE_FSTAT64) && !defined(__OSX_AVAILABLE_BUT_DEPRECATED)
683
1.23k
  return fstat64(fd, buf);
684
#else
685
  return fstat(fd, buf);
686
#endif
687
1.23k
}
688
689
690
static errcode_t unix_open_channel(const char *name, int fd,
691
           int flags, io_channel *channel,
692
           io_manager io_mgr)
693
1.23k
{
694
1.23k
  io_channel  io = NULL;
695
1.23k
  struct unix_private_data *data = NULL;
696
1.23k
  errcode_t retval;
697
1.23k
  ext2fs_struct_stat st;
698
1.23k
#ifdef __linux__
699
1.23k
  struct    utsname ut;
700
1.23k
#endif
701
702
1.23k
  if (ext2fs_safe_getenv("UNIX_IO_FORCE_BOUNCE"))
703
0
    flags |= IO_FLAG_FORCE_BOUNCE;
704
705
1.23k
#ifdef __linux__
706
  /*
707
   * We need to make sure any previous errors in the block
708
   * device are thrown away, sigh.
709
   */
710
1.23k
  (void) fsync(fd);
711
1.23k
#endif
712
713
1.23k
  retval = ext2fs_get_mem(sizeof(struct struct_io_channel), &io);
714
1.23k
  if (retval)
715
0
    goto cleanup;
716
1.23k
  memset(io, 0, sizeof(struct struct_io_channel));
717
1.23k
  io->magic = EXT2_ET_MAGIC_IO_CHANNEL;
718
1.23k
  retval = ext2fs_get_mem(sizeof(struct unix_private_data), &data);
719
1.23k
  if (retval)
720
0
    goto cleanup;
721
722
1.23k
  io->manager = io_mgr;
723
1.23k
  retval = ext2fs_get_mem(strlen(name)+1, &io->name);
724
1.23k
  if (retval)
725
0
    goto cleanup;
726
727
1.23k
  strcpy(io->name, name);
728
1.23k
  io->private_data = data;
729
1.23k
  io->block_size = 1024;
730
1.23k
  io->read_error = 0;
731
1.23k
  io->write_error = 0;
732
1.23k
  io->refcount = 1;
733
1.23k
  io->flags = 0;
734
735
1.23k
  if (ext2fs_safe_getenv("UNIX_IO_NOZEROOUT"))
736
0
    io->flags |= CHANNEL_FLAGS_NOZEROOUT;
737
738
1.23k
  memset(data, 0, sizeof(struct unix_private_data));
739
1.23k
  data->magic = EXT2_ET_MAGIC_UNIX_IO_CHANNEL;
740
1.23k
  data->io_stats.num_fields = 2;
741
1.23k
  data->flags = flags;
742
1.23k
  data->dev = fd;
743
744
1.23k
#if defined(O_DIRECT)
745
1.23k
  if (flags & IO_FLAG_DIRECT_IO)
746
0
    io->align = ext2fs_get_dio_alignment(data->dev);
747
#elif defined(F_NOCACHE)
748
  if (flags & IO_FLAG_DIRECT_IO)
749
    io->align = 4096;
750
#endif
751
752
  /*
753
   * If the device is really a block device, then set the
754
   * appropriate flag, otherwise we can set DISCARD_ZEROES flag
755
   * because we are going to use punch hole instead of discard
756
   * and if it succeed, subsequent read from sparse area returns
757
   * zero.
758
   */
759
1.23k
  if (ext2fs_fstat(data->dev, &st) == 0) {
760
1.23k
    if (ext2fsP_is_disk_device(st.st_mode)) {
761
0
#ifdef BLKDISCARDZEROES
762
0
      int zeroes = 0;
763
764
0
      if (ioctl(data->dev, BLKDISCARDZEROES, &zeroes) == 0 &&
765
0
          zeroes)
766
0
        io->flags |= CHANNEL_FLAGS_DISCARD_ZEROES;
767
0
#endif
768
0
      io->flags |= CHANNEL_FLAGS_BLOCK_DEVICE;
769
1.23k
    } else {
770
1.23k
      io->flags |= CHANNEL_FLAGS_DISCARD_ZEROES;
771
1.23k
    }
772
1.23k
  }
773
774
#if defined(__CYGWIN__)
775
  /*
776
   * Some operating systems require that the buffers be aligned,
777
   * regardless of O_DIRECT
778
   */
779
  if (!io->align)
780
    io->align = 512;
781
#endif
782
783
#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
784
  if (io->flags & CHANNEL_FLAGS_BLOCK_DEVICE) {
785
    int dio_align = ext2fs_get_dio_alignment(fd);
786
787
    if (io->align < dio_align)
788
      io->align = dio_align;
789
  }
790
#endif
791
792
1.23k
  if ((retval = alloc_cache(io, data)))
793
0
    goto cleanup;
794
795
1.23k
#ifdef BLKROGET
796
1.23k
  if (flags & IO_FLAG_RW) {
797
0
    int error;
798
0
    int readonly = 0;
799
800
    /* Is the block device actually writable? */
801
0
    error = ioctl(data->dev, BLKROGET, &readonly);
802
0
    if (!error && readonly) {
803
0
      retval = EPERM;
804
0
      goto cleanup;
805
0
    }
806
0
  }
807
1.23k
#endif
808
809
1.23k
#ifdef __linux__
810
1.23k
#undef RLIM_INFINITY
811
#if (defined(__alpha__) || ((defined(__sparc__) || defined(__mips__)) && (SIZEOF_LONG == 4)))
812
#define RLIM_INFINITY ((unsigned long)(~0UL>>1))
813
#else
814
1.23k
#define RLIM_INFINITY  (~0UL)
815
1.23k
#endif
816
  /*
817
   * Work around a bug in 2.4.10-2.4.18 kernels where writes to
818
   * block devices are wrongly getting hit by the filesize
819
   * limit.  This workaround isn't perfect, since it won't work
820
   * if glibc wasn't built against 2.2 header files.  (Sigh.)
821
   *
822
   */
823
1.23k
  if ((flags & IO_FLAG_RW) &&
824
1.23k
      (uname(&ut) == 0) &&
825
1.23k
      ((ut.release[0] == '2') && (ut.release[1] == '.') &&
826
0
       (ut.release[2] == '4') && (ut.release[3] == '.') &&
827
0
       (ut.release[4] == '1') && (ut.release[5] >= '0') &&
828
0
       (ut.release[5] < '8')) &&
829
1.23k
      (ext2fs_fstat(data->dev, &st) == 0) &&
830
1.23k
      (ext2fsP_is_disk_device(st.st_mode))) {
831
0
    struct rlimit rlim;
832
833
0
    rlim.rlim_cur = rlim.rlim_max = (unsigned long) RLIM_INFINITY;
834
0
    setrlimit(RLIMIT_FSIZE, &rlim);
835
0
    getrlimit(RLIMIT_FSIZE, &rlim);
836
0
    if (((unsigned long) rlim.rlim_cur) <
837
0
        ((unsigned long) rlim.rlim_max)) {
838
0
      rlim.rlim_cur = rlim.rlim_max;
839
0
      setrlimit(RLIMIT_FSIZE, &rlim);
840
0
    }
841
0
  }
842
1.23k
#endif
843
1.23k
#ifdef HAVE_PTHREAD
844
1.23k
  if (flags & IO_FLAG_THREADS) {
845
0
    io->flags |= CHANNEL_FLAGS_THREADS;
846
0
    retval = pthread_mutex_init(&data->cache_mutex, NULL);
847
0
    if (retval)
848
0
      goto cleanup;
849
0
    retval = pthread_mutex_init(&data->bounce_mutex, NULL);
850
0
    if (retval) {
851
0
      pthread_mutex_destroy(&data->cache_mutex);
852
0
      goto cleanup;
853
0
    }
854
0
    retval = pthread_mutex_init(&data->stats_mutex, NULL);
855
0
    if (retval) {
856
0
      pthread_mutex_destroy(&data->cache_mutex);
857
0
      pthread_mutex_destroy(&data->bounce_mutex);
858
0
      goto cleanup;
859
0
    }
860
0
  }
861
1.23k
#endif
862
1.23k
  *channel = io;
863
1.23k
  return 0;
864
865
0
cleanup:
866
0
  if (data) {
867
0
    if (data->dev >= 0)
868
0
      close(data->dev);
869
0
    free_cache(data);
870
0
    ext2fs_free_mem(&data);
871
0
  }
872
0
  if (io) {
873
0
    if (io->name) {
874
0
      ext2fs_free_mem(&io->name);
875
0
    }
876
0
    ext2fs_free_mem(&io);
877
0
  }
878
0
  return retval;
879
1.23k
}
880
881
static errcode_t unixfd_open(const char *str_fd, int flags,
882
           io_channel *channel)
883
0
{
884
0
  int fd;
885
0
  int fd_flags;
886
887
0
  fd = atoi(str_fd);
888
0
#if defined(HAVE_FCNTL)
889
0
  fd_flags = fcntl(fd, F_GETFD);
890
0
  if (fd_flags == -1)
891
0
    return EBADF;
892
893
0
  flags = 0;
894
0
  if (fd_flags & O_RDWR)
895
0
    flags |= IO_FLAG_RW;
896
0
  if (fd_flags & O_EXCL)
897
0
    flags |= IO_FLAG_EXCLUSIVE;
898
0
#if defined(O_DIRECT)
899
0
  if (fd_flags & O_DIRECT)
900
0
    flags |= IO_FLAG_DIRECT_IO;
901
0
#endif
902
0
#endif  /* HAVE_FCNTL */
903
904
0
  return unix_open_channel(str_fd, fd, flags, channel, unixfd_io_manager);
905
0
}
906
907
static errcode_t unix_open(const char *name, int flags,
908
         io_channel *channel)
909
1.23k
{
910
1.23k
  int fd = -1;
911
1.23k
  int open_flags;
912
913
1.23k
  if (name == 0)
914
0
    return EXT2_ET_BAD_DEVICE_NAME;
915
916
1.23k
  open_flags = (flags & IO_FLAG_RW) ? O_RDWR : O_RDONLY;
917
1.23k
  if (flags & IO_FLAG_EXCLUSIVE)
918
0
    open_flags |= O_EXCL;
919
1.23k
#if defined(O_DIRECT)
920
1.23k
  if (flags & IO_FLAG_DIRECT_IO)
921
0
    open_flags |= O_DIRECT;
922
1.23k
#endif
923
1.23k
  fd = ext2fs_open_file(name, open_flags, 0);
924
1.23k
  if (fd < 0)
925
0
    return errno;
926
#if defined(F_NOCACHE) && !defined(IO_DIRECT)
927
  if (flags & IO_FLAG_DIRECT_IO) {
928
    if (fcntl(fd, F_NOCACHE, 1) < 0)
929
      return errno;
930
  }
931
#endif
932
1.23k
  return unix_open_channel(name, fd, flags, channel, unix_io_manager);
933
1.23k
}
934
935
static errcode_t unix_close(io_channel channel)
936
1.23k
{
937
1.23k
  struct unix_private_data *data;
938
1.23k
  errcode_t retval = 0;
939
940
1.23k
  EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
941
1.23k
  data = (struct unix_private_data *) channel->private_data;
942
1.23k
  EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
943
944
1.23k
  if (--channel->refcount > 0)
945
0
    return 0;
946
947
1.23k
#ifndef NO_IO_CACHE
948
1.23k
  retval = flush_cached_blocks(channel, data, 0);
949
1.23k
#endif
950
951
1.23k
  if (close(data->dev) < 0)
952
0
    retval = errno;
953
1.23k
  free_cache(data);
954
1.23k
#ifdef HAVE_PTHREAD
955
1.23k
  if (data->flags & IO_FLAG_THREADS) {
956
0
    pthread_mutex_destroy(&data->cache_mutex);
957
0
    pthread_mutex_destroy(&data->bounce_mutex);
958
0
    pthread_mutex_destroy(&data->stats_mutex);
959
0
  }
960
1.23k
#endif
961
962
1.23k
  ext2fs_free_mem(&channel->private_data);
963
1.23k
  if (channel->name)
964
1.23k
    ext2fs_free_mem(&channel->name);
965
1.23k
  ext2fs_free_mem(&channel);
966
1.23k
  return retval;
967
1.23k
}
968
969
static errcode_t unix_set_blksize(io_channel channel, int blksize)
970
2.18k
{
971
2.18k
  struct unix_private_data *data;
972
2.18k
  errcode_t   retval = 0;
973
974
2.18k
  EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
975
2.18k
  data = (struct unix_private_data *) channel->private_data;
976
2.18k
  EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
977
978
2.18k
  if (channel->block_size != blksize) {
979
38
    mutex_lock(data, CACHE_MTX);
980
38
    mutex_lock(data, BOUNCE_MTX);
981
38
#ifndef NO_IO_CACHE
982
38
    if ((retval = flush_cached_blocks(channel, data, FLUSH_NOLOCK))){
983
0
      mutex_unlock(data, BOUNCE_MTX);
984
0
      mutex_unlock(data, CACHE_MTX);
985
0
      return retval;
986
0
    }
987
38
#endif
988
989
38
    channel->block_size = blksize;
990
38
    free_cache(data);
991
38
    retval = alloc_cache(channel, data);
992
38
    mutex_unlock(data, BOUNCE_MTX);
993
38
    mutex_unlock(data, CACHE_MTX);
994
38
  }
995
2.18k
  return retval;
996
2.18k
}
997
998
static errcode_t unix_read_blk64(io_channel channel, unsigned long long block,
999
             int count, void *buf)
1000
16.9k
{
1001
16.9k
  struct unix_private_data *data;
1002
16.9k
  struct unix_cache *cache;
1003
16.9k
  errcode_t retval;
1004
16.9k
  char    *cp;
1005
16.9k
  int   i, j;
1006
1007
16.9k
  EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
1008
16.9k
  data = (struct unix_private_data *) channel->private_data;
1009
16.9k
  EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
1010
1011
#ifdef NO_IO_CACHE
1012
  return raw_read_blk(channel, data, block, count, buf);
1013
#else
1014
16.9k
  if (data->flags & IO_FLAG_NOCACHE)
1015
0
    return raw_read_blk(channel, data, block, count, buf);
1016
  /*
1017
   * If we're doing an odd-sized read or a very large read,
1018
   * flush out the cache and then do a direct read.
1019
   */
1020
16.9k
  if (count < 0 || count > WRITE_DIRECT_SIZE) {
1021
2.96k
    if ((retval = flush_cached_blocks(channel, data, 0)))
1022
0
      return retval;
1023
2.96k
    return raw_read_blk(channel, data, block, count, buf);
1024
2.96k
  }
1025
1026
13.9k
  cp = buf;
1027
13.9k
  mutex_lock(data, CACHE_MTX);
1028
29.0k
  while (count > 0) {
1029
    /* If it's in the cache, use it! */
1030
15.2k
    if ((cache = find_cached_block(data, block, NULL))) {
1031
#ifdef DEBUG
1032
      printf("Using cached block %lu\n", block);
1033
#endif
1034
12.2k
      memcpy(cp, cache->buf, channel->block_size);
1035
12.2k
      count--;
1036
12.2k
      block++;
1037
12.2k
      cp += channel->block_size;
1038
12.2k
      continue;
1039
12.2k
    }
1040
1041
    /*
1042
     * Find the number of uncached blocks so we can do a
1043
     * single read request
1044
     */
1045
4.17k
    for (i=1; i < count; i++)
1046
1.35k
      if (find_cached_block(data, block+i, NULL))
1047
197
        break;
1048
#ifdef DEBUG
1049
    printf("Reading %d blocks starting at %lu\n", i, block);
1050
#endif
1051
3.01k
    mutex_unlock(data, CACHE_MTX);
1052
3.01k
    if ((retval = raw_read_blk(channel, data, block, i, cp)))
1053
167
      return retval;
1054
2.84k
    mutex_lock(data, CACHE_MTX);
1055
1056
    /* Save the results in the cache */
1057
6.80k
    for (j=0; j < i; j++) {
1058
3.96k
      if (!find_cached_block(data, block, &cache)) {
1059
3.96k
        retval = reuse_cache(channel, data,
1060
3.96k
                 cache, block);
1061
3.96k
        if (retval)
1062
0
          goto call_write_handler;
1063
3.96k
        memcpy(cache->buf, cp, channel->block_size);
1064
3.96k
      }
1065
3.96k
      count--;
1066
3.96k
      block++;
1067
3.96k
      cp += channel->block_size;
1068
3.96k
    }
1069
2.84k
  }
1070
13.8k
  mutex_unlock(data, CACHE_MTX);
1071
13.8k
  return 0;
1072
1073
0
call_write_handler:
1074
0
  if (cache->write_err && channel->write_error) {
1075
0
    char *err_buf = NULL;
1076
0
    unsigned long long err_block = cache->block;
1077
1078
0
    cache->dirty = 0;
1079
0
    cache->in_use = 0;
1080
0
    cache->write_err = 0;
1081
0
    if (io_channel_alloc_buf(channel, 0, &err_buf))
1082
0
      err_buf = NULL;
1083
0
    else
1084
0
      memcpy(err_buf, cache->buf, channel->block_size);
1085
0
    mutex_unlock(data, CACHE_MTX);
1086
0
    (channel->write_error)(channel, err_block, 1, err_buf,
1087
0
               channel->block_size, -1,
1088
0
               retval);
1089
0
    if (err_buf)
1090
0
      ext2fs_free_mem(&err_buf);
1091
0
  } else
1092
0
    mutex_unlock(data, CACHE_MTX);
1093
0
  return retval;
1094
13.9k
#endif /* NO_IO_CACHE */
1095
13.9k
}
1096
1097
static errcode_t unix_read_blk(io_channel channel, unsigned long block,
1098
             int count, void *buf)
1099
1.87k
{
1100
1.87k
  return unix_read_blk64(channel, block, count, buf);
1101
1.87k
}
1102
1103
static errcode_t unix_write_blk64(io_channel channel, unsigned long long block,
1104
        int count, const void *buf)
1105
0
{
1106
0
  struct unix_private_data *data;
1107
0
  struct unix_cache *cache, *reuse;
1108
0
  errcode_t retval = 0;
1109
0
  const char  *cp;
1110
0
  int   writethrough;
1111
1112
0
  EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
1113
0
  data = (struct unix_private_data *) channel->private_data;
1114
0
  EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
1115
1116
#ifdef NO_IO_CACHE
1117
  return raw_write_blk(channel, data, block, count, buf, 0);
1118
#else
1119
0
  if (data->flags & IO_FLAG_NOCACHE)
1120
0
    return raw_write_blk(channel, data, block, count, buf, 0);
1121
  /*
1122
   * If we're doing an odd-sized write or a very large write,
1123
   * flush out the cache completely and then do a direct write.
1124
   */
1125
0
  if (count < 0 || count > WRITE_DIRECT_SIZE) {
1126
0
    if ((retval = flush_cached_blocks(channel, data,
1127
0
              FLUSH_INVALIDATE)))
1128
0
      return retval;
1129
0
    return raw_write_blk(channel, data, block, count, buf, 0);
1130
0
  }
1131
1132
  /*
1133
   * For a moderate-sized multi-block write, first force a write
1134
   * if we're in write-through cache mode, and then fill the
1135
   * cache with the blocks.
1136
   */
1137
0
  writethrough = channel->flags & CHANNEL_FLAGS_WRITETHROUGH;
1138
0
  if (writethrough)
1139
0
    retval = raw_write_blk(channel, data, block, count, buf, 0);
1140
1141
0
  cp = buf;
1142
0
  mutex_lock(data, CACHE_MTX);
1143
0
  while (count > 0) {
1144
0
    cache = find_cached_block(data, block, &reuse);
1145
0
    if (!cache) {
1146
0
      errcode_t err;
1147
1148
0
      cache = reuse;
1149
0
      err = reuse_cache(channel, data, cache, block);
1150
0
      if (err)
1151
0
        goto call_write_handler;
1152
0
    }
1153
0
    if (cache->buf != cp)
1154
0
      memcpy(cache->buf, cp, channel->block_size);
1155
0
    cache->dirty = !writethrough;
1156
0
    count--;
1157
0
    block++;
1158
0
    cp += channel->block_size;
1159
0
  }
1160
0
  mutex_unlock(data, CACHE_MTX);
1161
0
  return retval;
1162
1163
0
call_write_handler:
1164
0
  if (cache->write_err && channel->write_error) {
1165
0
    char *err_buf = NULL;
1166
0
    unsigned long long err_block = cache->block;
1167
1168
0
    cache->dirty = 0;
1169
0
    cache->in_use = 0;
1170
0
    cache->write_err = 0;
1171
0
    if (io_channel_alloc_buf(channel, 0, &err_buf))
1172
0
      err_buf = NULL;
1173
0
    else
1174
0
      memcpy(err_buf, cache->buf, channel->block_size);
1175
0
    mutex_unlock(data, CACHE_MTX);
1176
0
    (channel->write_error)(channel, err_block, 1, err_buf,
1177
0
               channel->block_size, -1,
1178
0
               retval);
1179
0
    if (err_buf)
1180
0
      ext2fs_free_mem(&err_buf);
1181
0
  } else
1182
0
    mutex_unlock(data, CACHE_MTX);
1183
0
  return retval;
1184
0
#endif /* NO_IO_CACHE */
1185
0
}
1186
1187
static errcode_t unix_cache_readahead(io_channel channel,
1188
              unsigned long long block,
1189
              unsigned long long count)
1190
14.7M
{
1191
14.7M
#ifdef POSIX_FADV_WILLNEED
1192
14.7M
  struct unix_private_data *data;
1193
1194
14.7M
  data = (struct unix_private_data *)channel->private_data;
1195
14.7M
  EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
1196
14.7M
  return posix_fadvise(data->dev,
1197
14.7M
           (ext2_loff_t)block * channel->block_size + data->offset,
1198
14.7M
           (ext2_loff_t)count * channel->block_size,
1199
14.7M
           POSIX_FADV_WILLNEED);
1200
#else
1201
  return EXT2_ET_OP_NOT_SUPPORTED;
1202
#endif
1203
14.7M
}
1204
1205
static errcode_t unix_write_blk(io_channel channel, unsigned long block,
1206
        int count, const void *buf)
1207
0
{
1208
0
  return unix_write_blk64(channel, block, count, buf);
1209
0
}
1210
1211
static errcode_t unix_write_byte(io_channel channel, unsigned long offset,
1212
         int size, const void *buf)
1213
0
{
1214
0
  struct unix_private_data *data;
1215
0
  errcode_t retval = 0;
1216
0
  ssize_t   actual;
1217
1218
0
  EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
1219
0
  data = (struct unix_private_data *) channel->private_data;
1220
0
  EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
1221
1222
0
  if (channel->align != 0) {
1223
#ifdef ALIGN_DEBUG
1224
    printf("unix_write_byte: O_DIRECT fallback\n");
1225
#endif
1226
0
    return EXT2_ET_UNIMPLEMENTED;
1227
0
  }
1228
1229
0
#ifndef NO_IO_CACHE
1230
  /*
1231
   * Flush out the cache completely
1232
   */
1233
0
  if ((retval = flush_cached_blocks(channel, data, FLUSH_INVALIDATE)))
1234
0
    return retval;
1235
0
#endif
1236
1237
0
  if (lseek(data->dev, offset + data->offset, SEEK_SET) < 0)
1238
0
    return errno;
1239
1240
0
  actual = write(data->dev, buf, size);
1241
0
  if (actual < 0)
1242
0
    return errno;
1243
0
  if (actual != size)
1244
0
    return EXT2_ET_SHORT_WRITE;
1245
1246
0
  return 0;
1247
0
}
1248
1249
/*
1250
 * Flush data buffers to disk.
1251
 */
1252
static errcode_t unix_flush(io_channel channel)
1253
0
{
1254
0
  struct unix_private_data *data;
1255
0
  errcode_t retval = 0;
1256
1257
0
  EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
1258
0
  data = (struct unix_private_data *) channel->private_data;
1259
0
  EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
1260
1261
0
#ifndef NO_IO_CACHE
1262
0
  retval = flush_cached_blocks(channel, data, 0);
1263
0
#endif
1264
0
#ifdef HAVE_FSYNC
1265
0
  if (!retval && fsync(data->dev) != 0)
1266
0
    return errno;
1267
0
#endif
1268
0
  return retval;
1269
0
}
1270
1271
static errcode_t unix_set_option(io_channel channel, const char *option,
1272
         const char *arg)
1273
0
{
1274
0
  struct unix_private_data *data;
1275
0
  unsigned long long tmp;
1276
0
  errcode_t retval;
1277
0
  char *end;
1278
1279
0
  EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
1280
0
  data = (struct unix_private_data *) channel->private_data;
1281
0
  EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
1282
1283
0
  if (!strcmp(option, "offset")) {
1284
0
    if (!arg)
1285
0
      return EXT2_ET_INVALID_ARGUMENT;
1286
1287
0
    tmp = strtoull(arg, &end, 0);
1288
0
    if (*end)
1289
0
      return EXT2_ET_INVALID_ARGUMENT;
1290
0
    data->offset = tmp;
1291
0
    if (data->offset < 0)
1292
0
      return EXT2_ET_INVALID_ARGUMENT;
1293
0
    return 0;
1294
0
  }
1295
0
  if (!strcmp(option, "cache")) {
1296
0
    if (!arg)
1297
0
      return EXT2_ET_INVALID_ARGUMENT;
1298
0
    if (!strcmp(arg, "on")) {
1299
0
      data->flags &= ~IO_FLAG_NOCACHE;
1300
0
      return 0;
1301
0
    }
1302
0
    if (!strcmp(arg, "off")) {
1303
0
      retval = flush_cached_blocks(channel, data, 0);
1304
0
      data->flags |= IO_FLAG_NOCACHE;
1305
0
      return retval;
1306
0
    }
1307
0
    return EXT2_ET_INVALID_ARGUMENT;
1308
0
  }
1309
0
  return EXT2_ET_INVALID_ARGUMENT;
1310
0
}
1311
1312
#if defined(__linux__) && !defined(BLKDISCARD)
1313
0
#define BLKDISCARD    _IO(0x12,119)
1314
#endif
1315
1316
static errcode_t unix_discard(io_channel channel, unsigned long long block,
1317
            unsigned long long count)
1318
0
{
1319
0
  struct unix_private_data *data;
1320
0
  int   ret = EOPNOTSUPP;
1321
1322
0
  EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
1323
0
  data = (struct unix_private_data *) channel->private_data;
1324
0
  EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
1325
1326
0
  if (channel->flags & CHANNEL_FLAGS_NODISCARD)
1327
0
    goto unimplemented;
1328
1329
0
  if (channel->flags & CHANNEL_FLAGS_BLOCK_DEVICE) {
1330
0
#ifdef BLKDISCARD
1331
0
    __u64 range[2];
1332
1333
0
    range[0] = (__u64)(block) * channel->block_size + data->offset;
1334
0
    range[1] = (__u64)(count) * channel->block_size;
1335
1336
0
    ret = ioctl(data->dev, BLKDISCARD, &range);
1337
#else
1338
    goto unimplemented;
1339
#endif
1340
0
  } else {
1341
0
#if defined(HAVE_FALLOCATE) && defined(FALLOC_FL_PUNCH_HOLE)
1342
    /*
1343
     * If we are not on block device, try to use punch hole
1344
     * to reclaim free space.
1345
     */
1346
0
    ret = fallocate(data->dev,
1347
0
        FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
1348
0
        (off_t)(block) * channel->block_size + data->offset,
1349
0
        (off_t)(count) * channel->block_size);
1350
#else
1351
    goto unimplemented;
1352
#endif
1353
0
  }
1354
0
  if (ret < 0) {
1355
0
    if (errno == EOPNOTSUPP) {
1356
0
      channel->flags |= CHANNEL_FLAGS_NODISCARD;
1357
0
      goto unimplemented;
1358
0
    }
1359
0
    return errno;
1360
0
  }
1361
0
  return 0;
1362
0
unimplemented:
1363
0
  return EXT2_ET_UNIMPLEMENTED;
1364
0
}
1365
1366
/*
1367
 * If we know about ZERO_RANGE, try that before we try PUNCH_HOLE because
1368
 * ZERO_RANGE doesn't unmap preallocated blocks.  We prefer fallocate because
1369
 * it always invalidates page cache, and libext2fs requires that reads after
1370
 * ZERO_RANGE return zeroes.
1371
 */
1372
static int __unix_zeroout(int fd, off_t offset, off_t len)
1373
0
{
1374
0
  int ret = -1;
1375
1376
0
#if defined(HAVE_FALLOCATE) && defined(FALLOC_FL_ZERO_RANGE)
1377
0
  ret = fallocate(fd, FALLOC_FL_ZERO_RANGE, offset, len);
1378
0
  if (ret == 0)
1379
0
    return 0;
1380
0
#endif
1381
0
#if defined(HAVE_FALLOCATE) && defined(FALLOC_FL_PUNCH_HOLE) && defined(FALLOC_FL_KEEP_SIZE)
1382
0
  ret = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
1383
0
      offset,  len);
1384
0
  if (ret == 0)
1385
0
    return 0;
1386
0
#endif
1387
0
  errno = EOPNOTSUPP;
1388
0
  return ret;
1389
0
}
1390
1391
/* parameters might not be used if OS doesn't support zeroout */
1392
#if __GNUC_PREREQ (4, 6)
1393
#pragma GCC diagnostic push
1394
#pragma GCC diagnostic ignored "-Wunused-parameter"
1395
#endif
1396
static errcode_t unix_zeroout(io_channel channel, unsigned long long block,
1397
            unsigned long long count)
1398
0
{
1399
0
  struct unix_private_data *data;
1400
0
  int   ret;
1401
1402
0
  EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
1403
0
  data = (struct unix_private_data *) channel->private_data;
1404
0
  EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
1405
1406
0
  if (!(channel->flags & CHANNEL_FLAGS_BLOCK_DEVICE)) {
1407
    /* Regular file, try to use truncate/punch/zero. */
1408
0
    struct stat statbuf;
1409
1410
0
    if (count == 0)
1411
0
      return 0;
1412
    /*
1413
     * If we're trying to zero a range past the end of the file,
1414
     * extend the file size, then truncate everything.
1415
     */
1416
0
    ret = fstat(data->dev, &statbuf);
1417
0
    if (ret)
1418
0
      goto err;
1419
0
    if ((unsigned long long) statbuf.st_size <
1420
0
      (block + count) * channel->block_size + data->offset) {
1421
0
      ret = ftruncate(data->dev,
1422
0
          (block + count) * channel->block_size + data->offset);
1423
0
      if (ret)
1424
0
        goto err;
1425
0
    }
1426
0
  }
1427
1428
0
  if (channel->flags & CHANNEL_FLAGS_NOZEROOUT)
1429
0
    goto unimplemented;
1430
1431
0
  ret = __unix_zeroout(data->dev,
1432
0
      (off_t)(block) * channel->block_size + data->offset,
1433
0
      (off_t)(count) * channel->block_size);
1434
0
err:
1435
0
  if (ret < 0) {
1436
0
    if (errno == EOPNOTSUPP) {
1437
0
      channel->flags |= CHANNEL_FLAGS_NOZEROOUT;
1438
0
      goto unimplemented;
1439
0
    }
1440
0
    return errno;
1441
0
  }
1442
0
  return 0;
1443
0
unimplemented:
1444
0
  return EXT2_ET_UNIMPLEMENTED;
1445
0
}
1446
#if __GNUC_PREREQ (4, 6)
1447
#pragma GCC diagnostic pop
1448
#endif
1449
1450
static struct struct_io_manager struct_unix_manager = {
1451
  .magic    = EXT2_ET_MAGIC_IO_MANAGER,
1452
  .name   = "Unix I/O Manager",
1453
  .open   = unix_open,
1454
  .close    = unix_close,
1455
  .set_blksize  = unix_set_blksize,
1456
  .read_blk = unix_read_blk,
1457
  .write_blk  = unix_write_blk,
1458
  .flush    = unix_flush,
1459
  .write_byte = unix_write_byte,
1460
  .set_option = unix_set_option,
1461
  .get_stats  = unix_get_stats,
1462
  .read_blk64 = unix_read_blk64,
1463
  .write_blk64  = unix_write_blk64,
1464
  .discard  = unix_discard,
1465
  .cache_readahead  = unix_cache_readahead,
1466
  .zeroout  = unix_zeroout,
1467
};
1468
1469
io_manager unix_io_manager = &struct_unix_manager;
1470
1471
static struct struct_io_manager struct_unixfd_manager = {
1472
  .magic    = EXT2_ET_MAGIC_IO_MANAGER,
1473
  .name   = "Unix fd I/O Manager",
1474
  .open   = unixfd_open,
1475
  .close    = unix_close,
1476
  .set_blksize  = unix_set_blksize,
1477
  .read_blk = unix_read_blk,
1478
  .write_blk  = unix_write_blk,
1479
  .flush    = unix_flush,
1480
  .write_byte = unix_write_byte,
1481
  .set_option = unix_set_option,
1482
  .get_stats  = unix_get_stats,
1483
  .read_blk64 = unix_read_blk64,
1484
  .write_blk64  = unix_write_blk64,
1485
  .discard  = unix_discard,
1486
  .cache_readahead  = unix_cache_readahead,
1487
  .zeroout  = unix_zeroout,
1488
};
1489
1490
io_manager unixfd_io_manager = &struct_unixfd_manager;