LCOV - code coverage report
Current view: top level - pebble/vfs - default_linux.go (source / functions) Hit Total Coverage
Test: 2024-09-13 08:16Z 0665a3e1 - tests + meta.lcov Lines: 51 86 59.3 %
Date: 2024-09-13 08:17:21 Functions: 0 0 -

          Line data    Source code
       1             : // Copyright 2023 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2             : // of this source code is governed by a BSD-style license that can be found in
       3             : // the LICENSE file.
       4             : 
       5             : //go:build linux
       6             : // +build linux
       7             : 
       8             : package vfs
       9             : 
      10             : import (
      11             :         "io/fs"
      12             :         "os"
      13             :         "syscall"
      14             : 
      15             :         "github.com/cockroachdb/errors"
      16             :         "golang.org/x/sys/unix"
      17             : )
      18             : 
      19           2 : func wrapOSFileImpl(f *os.File) File {
      20           2 :         lf := &linuxFile{File: f, fd: f.Fd()}
      21           2 :         if lf.fd != InvalidFd {
      22           2 :                 lf.useSyncRange = isSyncRangeSupported(lf.fd)
      23           2 :         }
      24           2 :         return lf
      25             : }
      26             : 
      27           2 : func (defaultFS) OpenDir(name string) (File, error) {
      28           2 :         f, err := os.OpenFile(name, syscall.O_CLOEXEC, 0)
      29           2 :         if err != nil {
      30           0 :                 return nil, errors.WithStack(err)
      31           0 :         }
      32           2 :         return &linuxDir{f}, nil
      33             : }
      34             : 
      35             : // Assert that linuxFile and linuxDir implement vfs.File.
      36             : var (
      37             :         _ File = (*linuxDir)(nil)
      38             :         _ File = (*linuxFile)(nil)
      39             : )
      40             : 
      41             : type linuxDir struct {
      42             :         *os.File
      43             : }
      44             : 
      45           0 : func (d *linuxDir) Prefetch(offset int64, length int64) error      { return nil }
      46           0 : func (d *linuxDir) Preallocate(offset, length int64) error         { return nil }
      47           0 : func (d *linuxDir) Stat() (FileInfo, error)                        { return maybeWrapFileInfo(d.File.Stat()) }
      48           0 : func (d *linuxDir) SyncData() error                                { return d.Sync() }
      49           0 : func (d *linuxDir) SyncTo(offset int64) (fullSync bool, err error) { return false, nil }
      50             : 
      51             : type linuxFile struct {
      52             :         *os.File
      53             :         fd           uintptr
      54             :         useSyncRange bool
      55             : }
      56             : 
      57           1 : func (f *linuxFile) Prefetch(offset int64, length int64) error {
      58           1 :         _, _, err := unix.Syscall(unix.SYS_READAHEAD, uintptr(f.fd), uintptr(offset), uintptr(length))
      59           1 :         return err
      60           1 : }
      61             : 
      62           2 : func (f *linuxFile) Preallocate(offset, length int64) error {
      63           2 :         return unix.Fallocate(int(f.fd), unix.FALLOC_FL_KEEP_SIZE, offset, length)
      64           2 : }
      65             : 
      66           2 : func (f *linuxFile) Stat() (FileInfo, error) {
      67           2 :         fi, err := f.File.Stat()
      68           2 :         if err != nil {
      69           0 :                 return nil, err
      70           0 :         }
      71           2 :         return defaultFileInfo{fi}, nil
      72             : }
      73             : 
      74           2 : func (f *linuxFile) SyncData() error {
      75           2 :         return unix.Fdatasync(int(f.fd))
      76           2 : }
      77             : 
      78           1 : func (f *linuxFile) SyncTo(offset int64) (fullSync bool, err error) {
      79           1 :         if !f.useSyncRange {
      80           0 :                 // Use fdatasync, which does provide persistence guarantees but won't
      81           0 :                 // update all file metadata. From the `fdatasync` man page:
      82           0 :                 //
      83           0 :                 // fdatasync() is similar to fsync(), but does not flush modified
      84           0 :                 // metadata unless that metadata is needed in order to allow a
      85           0 :                 // subsequent data retrieval to be correctly handled. For example,
      86           0 :                 // changes to st_atime or st_mtime (respectively, time of last access
      87           0 :                 // and time of last modification; see stat(2)) do not require flushing
      88           0 :                 // because they are not necessary for a subsequent data read to be
      89           0 :                 // handled correctly. On the other hand, a change to the file size
      90           0 :                 // (st_size, as made by say ftruncate(2)), would require a metadata
      91           0 :                 // flush.
      92           0 :                 if err = unix.Fdatasync(int(f.fd)); err != nil {
      93           0 :                         return false, err
      94           0 :                 }
      95           0 :                 return true, nil
      96             :         }
      97             : 
      98           1 :         const (
      99           1 :                 waitBefore = 0x1
     100           1 :                 write      = 0x2
     101           1 :                 // waitAfter = 0x4
     102           1 :         )
     103           1 : 
     104           1 :         // By specifying write|waitBefore for the flags, we're instructing
     105           1 :         // SyncFileRange to a) wait for any outstanding data being written to finish,
     106           1 :         // and b) to queue any other dirty data blocks in the range [0,offset] for
     107           1 :         // writing. The actual writing of this data will occur asynchronously. The
     108           1 :         // use of `waitBefore` is to limit how much dirty data is allowed to
     109           1 :         // accumulate. Linux sometimes behaves poorly when a large amount of dirty
     110           1 :         // data accumulates, impacting other I/O operations.
     111           1 :         return false, unix.SyncFileRange(int(f.fd), 0, offset, write|waitBefore)
     112             : }
     113             : 
     114             : type syncFileRange func(fd int, off int64, n int64, flags int) (err error)
     115             : 
     116             : // sync_file_range depends on both the filesystem, and the broader kernel
     117             : // support. In particular, Windows Subsystem for Linux does not support
     118             : // sync_file_range, even when used with ext{2,3,4}. syncRangeSmokeTest performs
     119             : // a test of of sync_file_range, returning false on ENOSYS, and true otherwise.
     120           2 : func syncRangeSmokeTest(fd uintptr, syncFn syncFileRange) bool {
     121           2 :         err := syncFn(int(fd), 0 /* offset */, 0 /* nbytes */, 0 /* flags */)
     122           2 :         return err != unix.ENOSYS
     123           2 : }
     124             : 
     125           2 : func isSyncRangeSupported(fd uintptr) bool {
     126           2 :         var stat unix.Statfs_t
     127           2 :         if err := unix.Fstatfs(int(fd), &stat); err != nil {
     128           0 :                 return false
     129           0 :         }
     130             : 
     131             :         // Allowlist which filesystems we allow using sync_file_range with as some
     132             :         // filesystems treat that syscall as a noop (notably ZFS). A allowlist is
     133             :         // used instead of a denylist in order to have a more graceful failure mode
     134             :         // in case a filesystem we haven't tested is encountered. Currently only
     135             :         // ext2/3/4 are known to work properly.
     136           2 :         const extMagic = 0xef53
     137           2 :         switch stat.Type {
     138           2 :         case extMagic:
     139           2 :                 return syncRangeSmokeTest(fd, unix.SyncFileRange)
     140             :         }
     141           0 :         return false
     142             : }
     143             : 
     144           0 : func deviceIDFromFileInfo(finfo fs.FileInfo) DeviceID {
     145           0 :         statInfo := finfo.Sys().(*syscall.Stat_t)
     146           0 :         return DeviceID{
     147           0 :                 major: unix.Major(uint64(statInfo.Dev)),
     148           0 :                 minor: unix.Minor(uint64(statInfo.Dev)),
     149           0 :         }
     150           0 : }

Generated by: LCOV version 1.14