Coverage Report

Created: 2025-11-16 07:04

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/leveldb/util/env_posix.cc
Line
Count
Source
1
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2
// Use of this source code is governed by a BSD-style license that can be
3
// found in the LICENSE file. See the AUTHORS file for names of contributors.
4
5
#include <dirent.h>
6
#include <fcntl.h>
7
#include <sys/mman.h>
8
#ifndef __Fuchsia__
9
#include <sys/resource.h>
10
#endif
11
#include <sys/stat.h>
12
#include <sys/time.h>
13
#include <sys/types.h>
14
#include <unistd.h>
15
16
#include <atomic>
17
#include <cerrno>
18
#include <cstddef>
19
#include <cstdint>
20
#include <cstdio>
21
#include <cstdlib>
22
#include <cstring>
23
#include <limits>
24
#include <queue>
25
#include <set>
26
#include <string>
27
#include <thread>
28
#include <type_traits>
29
#include <utility>
30
31
#include "leveldb/env.h"
32
#include "leveldb/slice.h"
33
#include "leveldb/status.h"
34
#include "port/port.h"
35
#include "port/thread_annotations.h"
36
#include "util/env_posix_test_helper.h"
37
#include "util/posix_logger.h"
38
39
namespace leveldb {
40
41
namespace {
42
43
// Set by EnvPosixTestHelper::SetReadOnlyMMapLimit() and MaxOpenFiles().
44
int g_open_read_only_file_limit = -1;
45
46
// Up to 1000 mmap regions for 64-bit binaries; none for 32-bit.
47
constexpr const int kDefaultMmapLimit = (sizeof(void*) >= 8) ? 1000 : 0;
48
49
// Can be set using EnvPosixTestHelper::SetReadOnlyMMapLimit().
50
int g_mmap_limit = kDefaultMmapLimit;
51
52
// Common flags defined for all posix open operations
53
#if defined(HAVE_O_CLOEXEC)
54
constexpr const int kOpenBaseFlags = O_CLOEXEC;
55
#else
56
constexpr const int kOpenBaseFlags = 0;
57
#endif  // defined(HAVE_O_CLOEXEC)
58
59
constexpr const size_t kWritableFileBufferSize = 65536;
60
61
240
Status PosixError(const std::string& context, int error_number) {
62
240
  if (error_number == ENOENT) {
63
78
    return Status::NotFound(context, std::strerror(error_number));
64
162
  } else {
65
162
    return Status::IOError(context, std::strerror(error_number));
66
162
  }
67
240
}
68
69
// Helper class to limit resource usage to avoid exhaustion.
70
// Currently used to limit read-only file descriptors and mmap file usage
71
// so that we do not run out of file descriptors or virtual memory, or run into
72
// kernel performance problems for very large databases.
73
class Limiter {
74
 public:
75
  // Limit maximum number of resources to |max_acquires|.
76
  Limiter(int max_acquires)
77
      :
78
#if !defined(NDEBUG)
79
        max_acquires_(max_acquires),
80
#endif  // !defined(NDEBUG)
81
2
        acquires_allowed_(max_acquires) {
82
2
    assert(max_acquires >= 0);
83
2
  }
84
85
  Limiter(const Limiter&) = delete;
86
  Limiter operator=(const Limiter&) = delete;
87
88
  // If another resource is available, acquire it and return true.
89
  // Else return false.
90
79
  bool Acquire() {
91
79
    int old_acquires_allowed =
92
79
        acquires_allowed_.fetch_sub(1, std::memory_order_relaxed);
93
94
79
    if (old_acquires_allowed > 0) return true;
95
96
0
    int pre_increment_acquires_allowed =
97
0
        acquires_allowed_.fetch_add(1, std::memory_order_relaxed);
98
99
    // Silence compiler warnings about unused arguments when NDEBUG is defined.
100
0
    (void)pre_increment_acquires_allowed;
101
    // If the check below fails, Release() was called more times than acquire.
102
0
    assert(pre_increment_acquires_allowed < max_acquires_);
103
104
0
    return false;
105
79
  }
106
107
  // Release a resource acquired by a previous call to Acquire() that returned
108
  // true.
109
79
  void Release() {
110
79
    int old_acquires_allowed =
111
79
        acquires_allowed_.fetch_add(1, std::memory_order_relaxed);
112
113
    // Silence compiler warnings about unused arguments when NDEBUG is defined.
114
79
    (void)old_acquires_allowed;
115
    // If the check below fails, Release() was called more times than acquire.
116
79
    assert(old_acquires_allowed < max_acquires_);
117
79
  }
118
119
 private:
120
#if !defined(NDEBUG)
121
  // Catches an excessive number of Release() calls.
122
  const int max_acquires_;
123
#endif  // !defined(NDEBUG)
124
125
  // The number of available resources.
126
  //
127
  // This is a counter and is not tied to the invariants of any other class, so
128
  // it can be operated on safely using std::memory_order_relaxed.
129
  std::atomic<int> acquires_allowed_;
130
};
131
132
// Implements sequential read access in a file using read().
133
//
134
// Instances of this class are thread-friendly but not thread-safe, as required
135
// by the SequentialFile API.
136
class PosixSequentialFile final : public SequentialFile {
137
 public:
138
  PosixSequentialFile(std::string filename, int fd)
139
276
      : fd_(fd), filename_(std::move(filename)) {}
140
276
  ~PosixSequentialFile() override { close(fd_); }
141
142
1.14k
  Status Read(size_t n, Slice* result, char* scratch) override {
143
1.14k
    Status status;
144
1.14k
    while (true) {
145
1.14k
      ::ssize_t read_size = ::read(fd_, scratch, n);
146
1.14k
      if (read_size < 0) {  // Read error.
147
0
        if (errno == EINTR) {
148
0
          continue;  // Retry
149
0
        }
150
0
        status = PosixError(filename_, errno);
151
0
        break;
152
0
      }
153
1.14k
      *result = Slice(scratch, read_size);
154
1.14k
      break;
155
1.14k
    }
156
1.14k
    return status;
157
1.14k
  }
158
159
0
  Status Skip(uint64_t n) override {
160
0
    if (::lseek(fd_, n, SEEK_CUR) == static_cast<off_t>(-1)) {
161
0
      return PosixError(filename_, errno);
162
0
    }
163
0
    return Status::OK();
164
0
  }
165
166
 private:
167
  const int fd_;
168
  const std::string filename_;
169
};
170
171
// Implements random read access in a file using pread().
172
//
173
// Instances of this class are thread-safe, as required by the RandomAccessFile
174
// API. Instances are immutable and Read() only calls thread-safe library
175
// functions.
176
class PosixRandomAccessFile final : public RandomAccessFile {
177
 public:
178
  // The new instance takes ownership of |fd|. |fd_limiter| must outlive this
179
  // instance, and will be used to determine if .
180
  PosixRandomAccessFile(std::string filename, int fd, Limiter* fd_limiter)
181
0
      : has_permanent_fd_(fd_limiter->Acquire()),
182
0
        fd_(has_permanent_fd_ ? fd : -1),
183
0
        fd_limiter_(fd_limiter),
184
0
        filename_(std::move(filename)) {
185
0
    if (!has_permanent_fd_) {
186
0
      assert(fd_ == -1);
187
0
      ::close(fd);  // The file will be opened on every read.
188
0
    }
189
0
  }
190
191
0
  ~PosixRandomAccessFile() override {
192
0
    if (has_permanent_fd_) {
193
0
      assert(fd_ != -1);
194
0
      ::close(fd_);
195
0
      fd_limiter_->Release();
196
0
    }
197
0
  }
198
199
  Status Read(uint64_t offset, size_t n, Slice* result,
200
0
              char* scratch) const override {
201
0
    int fd = fd_;
202
0
    if (!has_permanent_fd_) {
203
0
      fd = ::open(filename_.c_str(), O_RDONLY | kOpenBaseFlags);
204
0
      if (fd < 0) {
205
0
        return PosixError(filename_, errno);
206
0
      }
207
0
    }
208
209
0
    assert(fd != -1);
210
211
0
    Status status;
212
0
    ssize_t read_size = ::pread(fd, scratch, n, static_cast<off_t>(offset));
213
0
    *result = Slice(scratch, (read_size < 0) ? 0 : read_size);
214
0
    if (read_size < 0) {
215
      // An error: return a non-ok status.
216
0
      status = PosixError(filename_, errno);
217
0
    }
218
0
    if (!has_permanent_fd_) {
219
      // Close the temporary file descriptor opened earlier.
220
0
      assert(fd != fd_);
221
0
      ::close(fd);
222
0
    }
223
0
    return status;
224
0
  }
225
226
 private:
227
  const bool has_permanent_fd_;  // If false, the file is opened on every read.
228
  const int fd_;                 // -1 if has_permanent_fd_ is false.
229
  Limiter* const fd_limiter_;
230
  const std::string filename_;
231
};
232
233
// Implements random read access in a file using mmap().
234
//
235
// Instances of this class are thread-safe, as required by the RandomAccessFile
236
// API. Instances are immutable and Read() only calls thread-safe library
237
// functions.
238
class PosixMmapReadableFile final : public RandomAccessFile {
239
 public:
240
  // mmap_base[0, length-1] points to the memory-mapped contents of the file. It
241
  // must be the result of a successful call to mmap(). This instances takes
242
  // over the ownership of the region.
243
  //
244
  // |mmap_limiter| must outlive this instance. The caller must have already
245
  // acquired the right to use one mmap region, which will be released when this
246
  // instance is destroyed.
247
  PosixMmapReadableFile(std::string filename, char* mmap_base, size_t length,
248
                        Limiter* mmap_limiter)
249
79
      : mmap_base_(mmap_base),
250
79
        length_(length),
251
79
        mmap_limiter_(mmap_limiter),
252
79
        filename_(std::move(filename)) {}
253
254
79
  ~PosixMmapReadableFile() override {
255
79
    ::munmap(static_cast<void*>(mmap_base_), length_);
256
79
    mmap_limiter_->Release();
257
79
  }
258
259
  Status Read(uint64_t offset, size_t n, Slice* result,
260
659
              char* scratch) const override {
261
659
    if (offset + n > length_) {
262
0
      *result = Slice();
263
0
      return PosixError(filename_, EINVAL);
264
0
    }
265
266
659
    *result = Slice(mmap_base_ + offset, n);
267
659
    return Status::OK();
268
659
  }
269
270
 private:
271
  char* const mmap_base_;
272
  const size_t length_;
273
  Limiter* const mmap_limiter_;
274
  const std::string filename_;
275
};
276
277
class PosixWritableFile final : public WritableFile {
278
 public:
279
  PosixWritableFile(std::string filename, int fd)
280
529
      : pos_(0),
281
529
        fd_(fd),
282
529
        is_manifest_(IsManifest(filename)),
283
529
        filename_(std::move(filename)),
284
529
        dirname_(Dirname(filename_)) {}
285
286
529
  ~PosixWritableFile() override {
287
529
    if (fd_ >= 0) {
288
      // Ignoring any potential errors
289
226
      Close();
290
226
    }
291
529
  }
292
293
9.84k
  Status Append(const Slice& data) override {
294
9.84k
    size_t write_size = data.size();
295
9.84k
    const char* write_data = data.data();
296
297
    // Fit as much as possible into buffer.
298
9.84k
    size_t copy_size = std::min(write_size, kWritableFileBufferSize - pos_);
299
9.84k
    std::memcpy(buf_ + pos_, write_data, copy_size);
300
9.84k
    write_data += copy_size;
301
9.84k
    write_size -= copy_size;
302
9.84k
    pos_ += copy_size;
303
9.84k
    if (write_size == 0) {
304
9.84k
      return Status::OK();
305
9.84k
    }
306
307
    // Can't fit in buffer, so need to do at least one write.
308
6
    Status status = FlushBuffer();
309
6
    if (!status.ok()) {
310
0
      return status;
311
0
    }
312
313
    // Small writes go to buffer, large writes are written directly.
314
6
    if (write_size < kWritableFileBufferSize) {
315
0
      std::memcpy(buf_, write_data, write_size);
316
0
      pos_ = write_size;
317
0
      return Status::OK();
318
0
    }
319
6
    return WriteUnbuffered(write_data, write_size);
320
6
  }
321
322
529
  Status Close() override {
323
529
    Status status = FlushBuffer();
324
529
    const int close_result = ::close(fd_);
325
529
    if (close_result < 0 && status.ok()) {
326
0
      status = PosixError(filename_, errno);
327
0
    }
328
529
    fd_ = -1;
329
529
    return status;
330
529
  }
331
332
4.73k
  Status Flush() override { return FlushBuffer(); }
333
334
424
  Status Sync() override {
335
    // Ensure new files referred to by the manifest are in the filesystem.
336
    //
337
    // This needs to happen before the manifest file is flushed to disk, to
338
    // avoid crashing in a state where the manifest refers to files that are not
339
    // yet on disk.
340
424
    Status status = SyncDirIfManifest();
341
424
    if (!status.ok()) {
342
0
      return status;
343
0
    }
344
345
424
    status = FlushBuffer();
346
424
    if (!status.ok()) {
347
0
      return status;
348
0
    }
349
350
424
    return SyncFd(fd_, filename_);
351
424
  }
352
353
 private:
354
5.69k
  Status FlushBuffer() {
355
5.69k
    Status status = WriteUnbuffered(buf_, pos_);
356
5.69k
    pos_ = 0;
357
5.69k
    return status;
358
5.69k
  }
359
360
5.69k
  Status WriteUnbuffered(const char* data, size_t size) {
361
10.6k
    while (size > 0) {
362
4.96k
      ssize_t write_result = ::write(fd_, data, size);
363
4.96k
      if (write_result < 0) {
364
0
        if (errno == EINTR) {
365
0
          continue;  // Retry
366
0
        }
367
0
        return PosixError(filename_, errno);
368
0
      }
369
4.96k
      data += write_result;
370
4.96k
      size -= write_result;
371
4.96k
    }
372
5.69k
    return Status::OK();
373
5.69k
  }
374
375
424
  Status SyncDirIfManifest() {
376
424
    Status status;
377
424
    if (!is_manifest_) {
378
219
      return status;
379
219
    }
380
381
205
    int fd = ::open(dirname_.c_str(), O_RDONLY | kOpenBaseFlags);
382
205
    if (fd < 0) {
383
0
      status = PosixError(dirname_, errno);
384
205
    } else {
385
205
      status = SyncFd(fd, dirname_);
386
205
      ::close(fd);
387
205
    }
388
205
    return status;
389
424
  }
390
391
  // Ensures that all the caches associated with the given file descriptor's
392
  // data are flushed all the way to durable media, and can withstand power
393
  // failures.
394
  //
395
  // The path argument is only used to populate the description string in the
396
  // returned Status if an error occurs.
397
629
  static Status SyncFd(int fd, const std::string& fd_path) {
398
#if HAVE_FULLFSYNC
399
    // On macOS and iOS, fsync() doesn't guarantee durability past power
400
    // failures. fcntl(F_FULLFSYNC) is required for that purpose. Some
401
    // filesystems don't support fcntl(F_FULLFSYNC), and require a fallback to
402
    // fsync().
403
    if (::fcntl(fd, F_FULLFSYNC) == 0) {
404
      return Status::OK();
405
    }
406
#endif  // HAVE_FULLFSYNC
407
408
629
#if HAVE_FDATASYNC
409
629
    bool sync_success = ::fdatasync(fd) == 0;
410
#else
411
    bool sync_success = ::fsync(fd) == 0;
412
#endif  // HAVE_FDATASYNC
413
414
629
    if (sync_success) {
415
629
      return Status::OK();
416
629
    }
417
0
    return PosixError(fd_path, errno);
418
629
  }
419
420
  // Returns the directory name in a path pointing to a file.
421
  //
422
  // Returns "." if the path does not contain any directory separator.
423
529
  static std::string Dirname(const std::string& filename) {
424
529
    std::string::size_type separator_pos = filename.rfind('/');
425
529
    if (separator_pos == std::string::npos) {
426
0
      return std::string(".");
427
0
    }
428
    // The filename component should not contain a path separator. If it does,
429
    // the splitting was done incorrectly.
430
529
    assert(filename.find('/', separator_pos + 1) == std::string::npos);
431
432
529
    return filename.substr(0, separator_pos);
433
529
  }
434
435
  // Extracts the file name from a path pointing to a file.
436
  //
437
  // The returned Slice points to |filename|'s data buffer, so it is only valid
438
  // while |filename| is alive and unchanged.
439
529
  static Slice Basename(const std::string& filename) {
440
529
    std::string::size_type separator_pos = filename.rfind('/');
441
529
    if (separator_pos == std::string::npos) {
442
0
      return Slice(filename);
443
0
    }
444
    // The filename component should not contain a path separator. If it does,
445
    // the splitting was done incorrectly.
446
529
    assert(filename.find('/', separator_pos + 1) == std::string::npos);
447
448
529
    return Slice(filename.data() + separator_pos + 1,
449
529
                 filename.length() - separator_pos - 1);
450
529
  }
451
452
  // True if the given file is a manifest file.
453
529
  static bool IsManifest(const std::string& filename) {
454
529
    return Basename(filename).starts_with("MANIFEST");
455
529
  }
456
457
  // buf_[0, pos_ - 1] contains data to be written to fd_.
458
  char buf_[kWritableFileBufferSize];
459
  size_t pos_;
460
  int fd_;
461
462
  const bool is_manifest_;  // True if the file's name starts with MANIFEST.
463
  const std::string filename_;
464
  const std::string dirname_;  // The directory of filename_.
465
};
466
467
226
int LockOrUnlock(int fd, bool lock) {
468
226
  errno = 0;
469
226
  struct ::flock file_lock_info;
470
226
  std::memset(&file_lock_info, 0, sizeof(file_lock_info));
471
226
  file_lock_info.l_type = (lock ? F_WRLCK : F_UNLCK);
472
226
  file_lock_info.l_whence = SEEK_SET;
473
226
  file_lock_info.l_start = 0;
474
226
  file_lock_info.l_len = 0;  // Lock/unlock entire file.
475
226
  return ::fcntl(fd, F_SETLK, &file_lock_info);
476
226
}
477
478
// Instances are thread-safe because they are immutable.
479
class PosixFileLock : public FileLock {
480
 public:
481
  PosixFileLock(int fd, std::string filename)
482
113
      : fd_(fd), filename_(std::move(filename)) {}
483
484
226
  int fd() const { return fd_; }
485
113
  const std::string& filename() const { return filename_; }
486
487
 private:
488
  const int fd_;
489
  const std::string filename_;
490
};
491
492
// Tracks the files locked by PosixEnv::LockFile().
493
//
494
// We maintain a separate set instead of relying on fcntl(F_SETLK) because
495
// fcntl(F_SETLK) does not provide any protection against multiple uses from the
496
// same process.
497
//
498
// Instances are thread-safe because all member data is guarded by a mutex.
499
class PosixLockTable {
500
 public:
501
113
  bool Insert(const std::string& fname) LOCKS_EXCLUDED(mu_) {
502
113
    mu_.Lock();
503
113
    bool succeeded = locked_files_.insert(fname).second;
504
113
    mu_.Unlock();
505
113
    return succeeded;
506
113
  }
507
113
  void Remove(const std::string& fname) LOCKS_EXCLUDED(mu_) {
508
113
    mu_.Lock();
509
113
    locked_files_.erase(fname);
510
113
    mu_.Unlock();
511
113
  }
512
513
 private:
514
  port::Mutex mu_;
515
  std::set<std::string> locked_files_ GUARDED_BY(mu_);
516
};
517
518
class PosixEnv : public Env {
519
 public:
520
  PosixEnv();
521
0
  ~PosixEnv() override {
522
0
    static const char msg[] =
523
0
        "PosixEnv singleton destroyed. Unsupported behavior!\n";
524
0
    std::fwrite(msg, 1, sizeof(msg), stderr);
525
0
    std::abort();
526
0
  }
527
528
  Status NewSequentialFile(const std::string& filename,
529
276
                           SequentialFile** result) override {
530
276
    int fd = ::open(filename.c_str(), O_RDONLY | kOpenBaseFlags);
531
276
    if (fd < 0) {
532
0
      *result = nullptr;
533
0
      return PosixError(filename, errno);
534
0
    }
535
536
276
    *result = new PosixSequentialFile(filename, fd);
537
276
    return Status::OK();
538
276
  }
539
540
  Status NewRandomAccessFile(const std::string& filename,
541
79
                             RandomAccessFile** result) override {
542
79
    *result = nullptr;
543
79
    int fd = ::open(filename.c_str(), O_RDONLY | kOpenBaseFlags);
544
79
    if (fd < 0) {
545
0
      return PosixError(filename, errno);
546
0
    }
547
548
79
    if (!mmap_limiter_.Acquire()) {
549
0
      *result = new PosixRandomAccessFile(filename, fd, &fd_limiter_);
550
0
      return Status::OK();
551
0
    }
552
553
79
    uint64_t file_size;
554
79
    Status status = GetFileSize(filename, &file_size);
555
79
    if (status.ok()) {
556
79
      void* mmap_base =
557
79
          ::mmap(/*addr=*/nullptr, file_size, PROT_READ, MAP_SHARED, fd, 0);
558
79
      if (mmap_base != MAP_FAILED) {
559
79
        *result = new PosixMmapReadableFile(filename,
560
79
                                            reinterpret_cast<char*>(mmap_base),
561
79
                                            file_size, &mmap_limiter_);
562
79
      } else {
563
0
        status = PosixError(filename, errno);
564
0
      }
565
79
    }
566
79
    ::close(fd);
567
79
    if (!status.ok()) {
568
0
      mmap_limiter_.Release();
569
0
    }
570
79
    return status;
571
79
  }
572
573
  Status NewWritableFile(const std::string& filename,
574
529
                         WritableFile** result) override {
575
529
    int fd = ::open(filename.c_str(),
576
529
                    O_TRUNC | O_WRONLY | O_CREAT | kOpenBaseFlags, 0644);
577
529
    if (fd < 0) {
578
0
      *result = nullptr;
579
0
      return PosixError(filename, errno);
580
0
    }
581
582
529
    *result = new PosixWritableFile(filename, fd);
583
529
    return Status::OK();
584
529
  }
585
586
  Status NewAppendableFile(const std::string& filename,
587
0
                           WritableFile** result) override {
588
0
    int fd = ::open(filename.c_str(),
589
0
                    O_APPEND | O_WRONLY | O_CREAT | kOpenBaseFlags, 0644);
590
0
    if (fd < 0) {
591
0
      *result = nullptr;
592
0
      return PosixError(filename, errno);
593
0
    }
594
595
0
    *result = new PosixWritableFile(filename, fd);
596
0
    return Status::OK();
597
0
  }
598
599
113
  bool FileExists(const std::string& filename) override {
600
113
    return ::access(filename.c_str(), F_OK) == 0;
601
113
  }
602
603
  Status GetChildren(const std::string& directory_path,
604
254
                     std::vector<std::string>* result) override {
605
254
    result->clear();
606
254
    ::DIR* dir = ::opendir(directory_path.c_str());
607
254
    if (dir == nullptr) {
608
0
      return PosixError(directory_path, errno);
609
0
    }
610
254
    struct ::dirent* entry;
611
2.51k
    while ((entry = ::readdir(dir)) != nullptr) {
612
2.25k
      result->emplace_back(entry->d_name);
613
2.25k
    }
614
254
    ::closedir(dir);
615
254
    return Status::OK();
616
254
  }
617
618
224
  Status RemoveFile(const std::string& filename) override {
619
224
    if (::unlink(filename.c_str()) != 0) {
620
14
      return PosixError(filename, errno);
621
14
    }
622
210
    return Status::OK();
623
224
  }
624
625
226
  Status CreateDir(const std::string& dirname) override {
626
226
    if (::mkdir(dirname.c_str(), 0755) != 0) {
627
162
      return PosixError(dirname, errno);
628
162
    }
629
64
    return Status::OK();
630
226
  }
631
632
0
  Status RemoveDir(const std::string& dirname) override {
633
0
    if (::rmdir(dirname.c_str()) != 0) {
634
0
      return PosixError(dirname, errno);
635
0
    }
636
0
    return Status::OK();
637
0
  }
638
639
79
  Status GetFileSize(const std::string& filename, uint64_t* size) override {
640
79
    struct ::stat file_stat;
641
79
    if (::stat(filename.c_str(), &file_stat) != 0) {
642
0
      *size = 0;
643
0
      return PosixError(filename, errno);
644
0
    }
645
79
    *size = file_stat.st_size;
646
79
    return Status::OK();
647
79
  }
648
649
290
  Status RenameFile(const std::string& from, const std::string& to) override {
650
290
    if (std::rename(from.c_str(), to.c_str()) != 0) {
651
64
      return PosixError(from, errno);
652
64
    }
653
226
    return Status::OK();
654
290
  }
655
656
113
  Status LockFile(const std::string& filename, FileLock** lock) override {
657
113
    *lock = nullptr;
658
659
113
    int fd = ::open(filename.c_str(), O_RDWR | O_CREAT | kOpenBaseFlags, 0644);
660
113
    if (fd < 0) {
661
0
      return PosixError(filename, errno);
662
0
    }
663
664
113
    if (!locks_.Insert(filename)) {
665
0
      ::close(fd);
666
0
      return Status::IOError("lock " + filename, "already held by process");
667
0
    }
668
669
113
    if (LockOrUnlock(fd, true) == -1) {
670
0
      int lock_errno = errno;
671
0
      ::close(fd);
672
0
      locks_.Remove(filename);
673
0
      return PosixError("lock " + filename, lock_errno);
674
0
    }
675
676
113
    *lock = new PosixFileLock(fd, filename);
677
113
    return Status::OK();
678
113
  }
679
680
113
  Status UnlockFile(FileLock* lock) override {
681
113
    PosixFileLock* posix_file_lock = static_cast<PosixFileLock*>(lock);
682
113
    if (LockOrUnlock(posix_file_lock->fd(), false) == -1) {
683
0
      return PosixError("unlock " + posix_file_lock->filename(), errno);
684
0
    }
685
113
    locks_.Remove(posix_file_lock->filename());
686
113
    ::close(posix_file_lock->fd());
687
113
    delete posix_file_lock;
688
113
    return Status::OK();
689
113
  }
690
691
  void Schedule(void (*background_work_function)(void* background_work_arg),
692
                void* background_work_arg) override;
693
694
  void StartThread(void (*thread_main)(void* thread_main_arg),
695
0
                   void* thread_main_arg) override {
696
0
    std::thread new_thread(thread_main, thread_main_arg);
697
0
    new_thread.detach();
698
0
  }
699
700
0
  Status GetTestDirectory(std::string* result) override {
701
0
    const char* env = std::getenv("TEST_TMPDIR");
702
0
    if (env && env[0] != '\0') {
703
0
      *result = env;
704
0
    } else {
705
0
      char buf[100];
706
0
      std::snprintf(buf, sizeof(buf), "/tmp/leveldbtest-%d",
707
0
                    static_cast<int>(::geteuid()));
708
0
      *result = buf;
709
0
    }
710
711
    // The CreateDir status is ignored because the directory may already exist.
712
0
    CreateDir(*result);
713
714
0
    return Status::OK();
715
0
  }
716
717
113
  Status NewLogger(const std::string& filename, Logger** result) override {
718
113
    int fd = ::open(filename.c_str(),
719
113
                    O_APPEND | O_WRONLY | O_CREAT | kOpenBaseFlags, 0644);
720
113
    if (fd < 0) {
721
0
      *result = nullptr;
722
0
      return PosixError(filename, errno);
723
0
    }
724
725
113
    std::FILE* fp = ::fdopen(fd, "w");
726
113
    if (fp == nullptr) {
727
0
      ::close(fd);
728
0
      *result = nullptr;
729
0
      return PosixError(filename, errno);
730
113
    } else {
731
113
      *result = new PosixLogger(fp);
732
113
      return Status::OK();
733
113
    }
734
113
  }
735
736
112
  uint64_t NowMicros() override {
737
112
    static constexpr uint64_t kUsecondsPerSecond = 1000000;
738
112
    struct ::timeval tv;
739
112
    ::gettimeofday(&tv, nullptr);
740
112
    return static_cast<uint64_t>(tv.tv_sec) * kUsecondsPerSecond + tv.tv_usec;
741
112
  }
742
743
0
  void SleepForMicroseconds(int micros) override {
744
0
    std::this_thread::sleep_for(std::chrono::microseconds(micros));
745
0
  }
746
747
 private:
748
  void BackgroundThreadMain();
749
750
1
  static void BackgroundThreadEntryPoint(PosixEnv* env) {
751
1
    env->BackgroundThreadMain();
752
1
  }
753
754
  // Stores the work item data in a Schedule() call.
755
  //
756
  // Instances are constructed on the thread calling Schedule() and used on the
757
  // background thread.
758
  //
759
  // This structure is thread-safe because it is immutable.
760
  struct BackgroundWorkItem {
761
    explicit BackgroundWorkItem(void (*function)(void* arg), void* arg)
762
48
        : function(function), arg(arg) {}
763
764
    void (*const function)(void*);
765
    void* const arg;
766
  };
767
768
  port::Mutex background_work_mutex_;
769
  port::CondVar background_work_cv_ GUARDED_BY(background_work_mutex_);
770
  bool started_background_thread_ GUARDED_BY(background_work_mutex_);
771
772
  std::queue<BackgroundWorkItem> background_work_queue_
773
      GUARDED_BY(background_work_mutex_);
774
775
  PosixLockTable locks_;  // Thread-safe.
776
  Limiter mmap_limiter_;  // Thread-safe.
777
  Limiter fd_limiter_;    // Thread-safe.
778
};
779
780
// Return the maximum number of concurrent mmaps.
781
1
int MaxMmaps() { return g_mmap_limit; }
782
783
// Return the maximum number of read-only files to keep open.
784
1
int MaxOpenFiles() {
785
1
  if (g_open_read_only_file_limit >= 0) {
786
0
    return g_open_read_only_file_limit;
787
0
  }
788
#ifdef __Fuchsia__
789
  // Fuchsia doesn't implement getrlimit.
790
  g_open_read_only_file_limit = 50;
791
#else
792
1
  struct ::rlimit rlim;
793
1
  if (::getrlimit(RLIMIT_NOFILE, &rlim)) {
794
    // getrlimit failed, fallback to hard-coded default.
795
0
    g_open_read_only_file_limit = 50;
796
1
  } else if (rlim.rlim_cur == RLIM_INFINITY) {
797
0
    g_open_read_only_file_limit = std::numeric_limits<int>::max();
798
1
  } else {
799
    // Allow use of 20% of available file descriptors for read-only files.
800
1
    g_open_read_only_file_limit = rlim.rlim_cur / 5;
801
1
  }
802
1
#endif
803
1
  return g_open_read_only_file_limit;
804
1
}
805
806
}  // namespace
807
808
PosixEnv::PosixEnv()
809
1
    : background_work_cv_(&background_work_mutex_),
810
1
      started_background_thread_(false),
811
1
      mmap_limiter_(MaxMmaps()),
812
1
      fd_limiter_(MaxOpenFiles()) {}
813
814
void PosixEnv::Schedule(
815
    void (*background_work_function)(void* background_work_arg),
816
48
    void* background_work_arg) {
817
48
  background_work_mutex_.Lock();
818
819
  // Start the background thread, if we haven't done so already.
820
48
  if (!started_background_thread_) {
821
1
    started_background_thread_ = true;
822
1
    std::thread background_thread(PosixEnv::BackgroundThreadEntryPoint, this);
823
1
    background_thread.detach();
824
1
  }
825
826
  // If the queue is empty, the background thread may be waiting for work.
827
48
  if (background_work_queue_.empty()) {
828
48
    background_work_cv_.Signal();
829
48
  }
830
831
48
  background_work_queue_.emplace(background_work_function, background_work_arg);
832
48
  background_work_mutex_.Unlock();
833
48
}
834
835
1
void PosixEnv::BackgroundThreadMain() {
836
50
  while (true) {
837
49
    background_work_mutex_.Lock();
838
839
    // Wait until there is work to be done.
840
93
    while (background_work_queue_.empty()) {
841
44
      background_work_cv_.Wait();
842
44
    }
843
844
49
    assert(!background_work_queue_.empty());
845
49
    auto background_work_function = background_work_queue_.front().function;
846
49
    void* background_work_arg = background_work_queue_.front().arg;
847
49
    background_work_queue_.pop();
848
849
49
    background_work_mutex_.Unlock();
850
49
    background_work_function(background_work_arg);
851
49
  }
852
1
}
853
854
namespace {
855
856
// Wraps an Env instance whose destructor is never created.
857
//
858
// Intended usage:
859
//   using PlatformSingletonEnv = SingletonEnv<PlatformEnv>;
860
//   void ConfigurePosixEnv(int param) {
861
//     PlatformSingletonEnv::AssertEnvNotInitialized();
862
//     // set global configuration flags.
863
//   }
864
//   Env* Env::Default() {
865
//     static PlatformSingletonEnv default_env;
866
//     return default_env.env();
867
//   }
868
template <typename EnvType>
869
class SingletonEnv {
870
 public:
871
1
  SingletonEnv() {
872
#if !defined(NDEBUG)
873
    env_initialized_.store(true, std::memory_order_relaxed);
874
#endif  // !defined(NDEBUG)
875
1
    static_assert(sizeof(env_storage_) >= sizeof(EnvType),
876
1
                  "env_storage_ will not fit the Env");
877
1
    static_assert(std::is_standard_layout_v<SingletonEnv<EnvType>>);
878
1
    static_assert(
879
1
        offsetof(SingletonEnv<EnvType>, env_storage_) % alignof(EnvType) == 0,
880
1
        "env_storage_ does not meet the Env's alignment needs");
881
1
    static_assert(alignof(SingletonEnv<EnvType>) % alignof(EnvType) == 0,
882
1
                  "env_storage_ does not meet the Env's alignment needs");
883
1
    new (env_storage_) EnvType();
884
1
  }
885
  ~SingletonEnv() = default;
886
887
  SingletonEnv(const SingletonEnv&) = delete;
888
  SingletonEnv& operator=(const SingletonEnv&) = delete;
889
890
192
  Env* env() { return reinterpret_cast<Env*>(&env_storage_); }
891
892
0
  static void AssertEnvNotInitialized() {
893
#if !defined(NDEBUG)
894
    assert(!env_initialized_.load(std::memory_order_relaxed));
895
#endif  // !defined(NDEBUG)
896
0
  }
897
898
 private:
899
  alignas(EnvType) char env_storage_[sizeof(EnvType)];
900
#if !defined(NDEBUG)
901
  static std::atomic<bool> env_initialized_;
902
#endif  // !defined(NDEBUG)
903
};
904
905
#if !defined(NDEBUG)
906
template <typename EnvType>
907
std::atomic<bool> SingletonEnv<EnvType>::env_initialized_;
908
#endif  // !defined(NDEBUG)
909
910
using PosixDefaultEnv = SingletonEnv<PosixEnv>;
911
912
}  // namespace
913
914
0
void EnvPosixTestHelper::SetReadOnlyFDLimit(int limit) {
915
0
  PosixDefaultEnv::AssertEnvNotInitialized();
916
0
  g_open_read_only_file_limit = limit;
917
0
}
918
919
0
void EnvPosixTestHelper::SetReadOnlyMMapLimit(int limit) {
920
0
  PosixDefaultEnv::AssertEnvNotInitialized();
921
0
  g_mmap_limit = limit;
922
0
}
923
924
192
Env* Env::Default() {
925
192
  static PosixDefaultEnv env_container;
926
192
  return env_container.env();
927
192
}
928
929
}  // namespace leveldb