/src/leveldb/util/env_posix.cc
Line | Count | Source |
1 | | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. |
2 | | // Use of this source code is governed by a BSD-style license that can be |
3 | | // found in the LICENSE file. See the AUTHORS file for names of contributors. |
4 | | |
5 | | #include <dirent.h> |
6 | | #include <fcntl.h> |
7 | | #include <sys/mman.h> |
8 | | #ifndef __Fuchsia__ |
9 | | #include <sys/resource.h> |
10 | | #endif |
11 | | #include <sys/stat.h> |
12 | | #include <sys/time.h> |
13 | | #include <sys/types.h> |
14 | | #include <unistd.h> |
15 | | |
16 | | #include <atomic> |
17 | | #include <cerrno> |
18 | | #include <cstddef> |
19 | | #include <cstdint> |
20 | | #include <cstdio> |
21 | | #include <cstdlib> |
22 | | #include <cstring> |
23 | | #include <limits> |
24 | | #include <queue> |
25 | | #include <set> |
26 | | #include <string> |
27 | | #include <thread> |
28 | | #include <type_traits> |
29 | | #include <utility> |
30 | | |
31 | | #include "leveldb/env.h" |
32 | | #include "leveldb/slice.h" |
33 | | #include "leveldb/status.h" |
34 | | #include "port/port.h" |
35 | | #include "port/thread_annotations.h" |
36 | | #include "util/env_posix_test_helper.h" |
37 | | #include "util/posix_logger.h" |
38 | | |
39 | | namespace leveldb { |
40 | | |
41 | | namespace { |
42 | | |
43 | | // Set by EnvPosixTestHelper::SetReadOnlyMMapLimit() and MaxOpenFiles(). |
44 | | int g_open_read_only_file_limit = -1; |
45 | | |
46 | | // Up to 1000 mmap regions for 64-bit binaries; none for 32-bit. |
47 | | constexpr const int kDefaultMmapLimit = (sizeof(void*) >= 8) ? 1000 : 0; |
48 | | |
49 | | // Can be set using EnvPosixTestHelper::SetReadOnlyMMapLimit(). |
50 | | int g_mmap_limit = kDefaultMmapLimit; |
51 | | |
52 | | // Common flags defined for all posix open operations |
53 | | #if defined(HAVE_O_CLOEXEC) |
54 | | constexpr const int kOpenBaseFlags = O_CLOEXEC; |
55 | | #else |
56 | | constexpr const int kOpenBaseFlags = 0; |
57 | | #endif // defined(HAVE_O_CLOEXEC) |
58 | | |
59 | | constexpr const size_t kWritableFileBufferSize = 65536; |
60 | | |
61 | 240 | Status PosixError(const std::string& context, int error_number) { |
62 | 240 | if (error_number == ENOENT) { |
63 | 78 | return Status::NotFound(context, std::strerror(error_number)); |
64 | 162 | } else { |
65 | 162 | return Status::IOError(context, std::strerror(error_number)); |
66 | 162 | } |
67 | 240 | } |
68 | | |
69 | | // Helper class to limit resource usage to avoid exhaustion. |
70 | | // Currently used to limit read-only file descriptors and mmap file usage |
71 | | // so that we do not run out of file descriptors or virtual memory, or run into |
72 | | // kernel performance problems for very large databases. |
73 | | class Limiter { |
74 | | public: |
75 | | // Limit maximum number of resources to |max_acquires|. |
76 | | Limiter(int max_acquires) |
77 | | : |
78 | | #if !defined(NDEBUG) |
79 | | max_acquires_(max_acquires), |
80 | | #endif // !defined(NDEBUG) |
81 | 2 | acquires_allowed_(max_acquires) { |
82 | 2 | assert(max_acquires >= 0); |
83 | 2 | } |
84 | | |
85 | | Limiter(const Limiter&) = delete; |
86 | | Limiter operator=(const Limiter&) = delete; |
87 | | |
88 | | // If another resource is available, acquire it and return true. |
89 | | // Else return false. |
90 | 79 | bool Acquire() { |
91 | 79 | int old_acquires_allowed = |
92 | 79 | acquires_allowed_.fetch_sub(1, std::memory_order_relaxed); |
93 | | |
94 | 79 | if (old_acquires_allowed > 0) return true; |
95 | | |
96 | 0 | int pre_increment_acquires_allowed = |
97 | 0 | acquires_allowed_.fetch_add(1, std::memory_order_relaxed); |
98 | | |
99 | | // Silence compiler warnings about unused arguments when NDEBUG is defined. |
100 | 0 | (void)pre_increment_acquires_allowed; |
101 | | // If the check below fails, Release() was called more times than acquire. |
102 | 0 | assert(pre_increment_acquires_allowed < max_acquires_); |
103 | |
|
104 | 0 | return false; |
105 | 79 | } |
106 | | |
107 | | // Release a resource acquired by a previous call to Acquire() that returned |
108 | | // true. |
109 | 79 | void Release() { |
110 | 79 | int old_acquires_allowed = |
111 | 79 | acquires_allowed_.fetch_add(1, std::memory_order_relaxed); |
112 | | |
113 | | // Silence compiler warnings about unused arguments when NDEBUG is defined. |
114 | 79 | (void)old_acquires_allowed; |
115 | | // If the check below fails, Release() was called more times than acquire. |
116 | 79 | assert(old_acquires_allowed < max_acquires_); |
117 | 79 | } |
118 | | |
119 | | private: |
120 | | #if !defined(NDEBUG) |
121 | | // Catches an excessive number of Release() calls. |
122 | | const int max_acquires_; |
123 | | #endif // !defined(NDEBUG) |
124 | | |
125 | | // The number of available resources. |
126 | | // |
127 | | // This is a counter and is not tied to the invariants of any other class, so |
128 | | // it can be operated on safely using std::memory_order_relaxed. |
129 | | std::atomic<int> acquires_allowed_; |
130 | | }; |
131 | | |
132 | | // Implements sequential read access in a file using read(). |
133 | | // |
134 | | // Instances of this class are thread-friendly but not thread-safe, as required |
135 | | // by the SequentialFile API. |
136 | | class PosixSequentialFile final : public SequentialFile { |
137 | | public: |
138 | | PosixSequentialFile(std::string filename, int fd) |
139 | 276 | : fd_(fd), filename_(std::move(filename)) {} |
140 | 276 | ~PosixSequentialFile() override { close(fd_); } |
141 | | |
142 | 1.14k | Status Read(size_t n, Slice* result, char* scratch) override { |
143 | 1.14k | Status status; |
144 | 1.14k | while (true) { |
145 | 1.14k | ::ssize_t read_size = ::read(fd_, scratch, n); |
146 | 1.14k | if (read_size < 0) { // Read error. |
147 | 0 | if (errno == EINTR) { |
148 | 0 | continue; // Retry |
149 | 0 | } |
150 | 0 | status = PosixError(filename_, errno); |
151 | 0 | break; |
152 | 0 | } |
153 | 1.14k | *result = Slice(scratch, read_size); |
154 | 1.14k | break; |
155 | 1.14k | } |
156 | 1.14k | return status; |
157 | 1.14k | } |
158 | | |
159 | 0 | Status Skip(uint64_t n) override { |
160 | 0 | if (::lseek(fd_, n, SEEK_CUR) == static_cast<off_t>(-1)) { |
161 | 0 | return PosixError(filename_, errno); |
162 | 0 | } |
163 | 0 | return Status::OK(); |
164 | 0 | } |
165 | | |
166 | | private: |
167 | | const int fd_; |
168 | | const std::string filename_; |
169 | | }; |
170 | | |
171 | | // Implements random read access in a file using pread(). |
172 | | // |
173 | | // Instances of this class are thread-safe, as required by the RandomAccessFile |
174 | | // API. Instances are immutable and Read() only calls thread-safe library |
175 | | // functions. |
176 | | class PosixRandomAccessFile final : public RandomAccessFile { |
177 | | public: |
178 | | // The new instance takes ownership of |fd|. |fd_limiter| must outlive this |
179 | | // instance, and will be used to determine if . |
180 | | PosixRandomAccessFile(std::string filename, int fd, Limiter* fd_limiter) |
181 | 0 | : has_permanent_fd_(fd_limiter->Acquire()), |
182 | 0 | fd_(has_permanent_fd_ ? fd : -1), |
183 | 0 | fd_limiter_(fd_limiter), |
184 | 0 | filename_(std::move(filename)) { |
185 | 0 | if (!has_permanent_fd_) { |
186 | 0 | assert(fd_ == -1); |
187 | 0 | ::close(fd); // The file will be opened on every read. |
188 | 0 | } |
189 | 0 | } |
190 | | |
191 | 0 | ~PosixRandomAccessFile() override { |
192 | 0 | if (has_permanent_fd_) { |
193 | 0 | assert(fd_ != -1); |
194 | 0 | ::close(fd_); |
195 | 0 | fd_limiter_->Release(); |
196 | 0 | } |
197 | 0 | } |
198 | | |
199 | | Status Read(uint64_t offset, size_t n, Slice* result, |
200 | 0 | char* scratch) const override { |
201 | 0 | int fd = fd_; |
202 | 0 | if (!has_permanent_fd_) { |
203 | 0 | fd = ::open(filename_.c_str(), O_RDONLY | kOpenBaseFlags); |
204 | 0 | if (fd < 0) { |
205 | 0 | return PosixError(filename_, errno); |
206 | 0 | } |
207 | 0 | } |
208 | | |
209 | 0 | assert(fd != -1); |
210 | |
|
211 | 0 | Status status; |
212 | 0 | ssize_t read_size = ::pread(fd, scratch, n, static_cast<off_t>(offset)); |
213 | 0 | *result = Slice(scratch, (read_size < 0) ? 0 : read_size); |
214 | 0 | if (read_size < 0) { |
215 | | // An error: return a non-ok status. |
216 | 0 | status = PosixError(filename_, errno); |
217 | 0 | } |
218 | 0 | if (!has_permanent_fd_) { |
219 | | // Close the temporary file descriptor opened earlier. |
220 | 0 | assert(fd != fd_); |
221 | 0 | ::close(fd); |
222 | 0 | } |
223 | 0 | return status; |
224 | 0 | } |
225 | | |
226 | | private: |
227 | | const bool has_permanent_fd_; // If false, the file is opened on every read. |
228 | | const int fd_; // -1 if has_permanent_fd_ is false. |
229 | | Limiter* const fd_limiter_; |
230 | | const std::string filename_; |
231 | | }; |
232 | | |
233 | | // Implements random read access in a file using mmap(). |
234 | | // |
235 | | // Instances of this class are thread-safe, as required by the RandomAccessFile |
236 | | // API. Instances are immutable and Read() only calls thread-safe library |
237 | | // functions. |
238 | | class PosixMmapReadableFile final : public RandomAccessFile { |
239 | | public: |
240 | | // mmap_base[0, length-1] points to the memory-mapped contents of the file. It |
241 | | // must be the result of a successful call to mmap(). This instances takes |
242 | | // over the ownership of the region. |
243 | | // |
244 | | // |mmap_limiter| must outlive this instance. The caller must have already |
245 | | // acquired the right to use one mmap region, which will be released when this |
246 | | // instance is destroyed. |
247 | | PosixMmapReadableFile(std::string filename, char* mmap_base, size_t length, |
248 | | Limiter* mmap_limiter) |
249 | 79 | : mmap_base_(mmap_base), |
250 | 79 | length_(length), |
251 | 79 | mmap_limiter_(mmap_limiter), |
252 | 79 | filename_(std::move(filename)) {} |
253 | | |
254 | 79 | ~PosixMmapReadableFile() override { |
255 | 79 | ::munmap(static_cast<void*>(mmap_base_), length_); |
256 | 79 | mmap_limiter_->Release(); |
257 | 79 | } |
258 | | |
259 | | Status Read(uint64_t offset, size_t n, Slice* result, |
260 | 659 | char* scratch) const override { |
261 | 659 | if (offset + n > length_) { |
262 | 0 | *result = Slice(); |
263 | 0 | return PosixError(filename_, EINVAL); |
264 | 0 | } |
265 | | |
266 | 659 | *result = Slice(mmap_base_ + offset, n); |
267 | 659 | return Status::OK(); |
268 | 659 | } |
269 | | |
270 | | private: |
271 | | char* const mmap_base_; |
272 | | const size_t length_; |
273 | | Limiter* const mmap_limiter_; |
274 | | const std::string filename_; |
275 | | }; |
276 | | |
277 | | class PosixWritableFile final : public WritableFile { |
278 | | public: |
279 | | PosixWritableFile(std::string filename, int fd) |
280 | 529 | : pos_(0), |
281 | 529 | fd_(fd), |
282 | 529 | is_manifest_(IsManifest(filename)), |
283 | 529 | filename_(std::move(filename)), |
284 | 529 | dirname_(Dirname(filename_)) {} |
285 | | |
286 | 529 | ~PosixWritableFile() override { |
287 | 529 | if (fd_ >= 0) { |
288 | | // Ignoring any potential errors |
289 | 226 | Close(); |
290 | 226 | } |
291 | 529 | } |
292 | | |
293 | 9.84k | Status Append(const Slice& data) override { |
294 | 9.84k | size_t write_size = data.size(); |
295 | 9.84k | const char* write_data = data.data(); |
296 | | |
297 | | // Fit as much as possible into buffer. |
298 | 9.84k | size_t copy_size = std::min(write_size, kWritableFileBufferSize - pos_); |
299 | 9.84k | std::memcpy(buf_ + pos_, write_data, copy_size); |
300 | 9.84k | write_data += copy_size; |
301 | 9.84k | write_size -= copy_size; |
302 | 9.84k | pos_ += copy_size; |
303 | 9.84k | if (write_size == 0) { |
304 | 9.84k | return Status::OK(); |
305 | 9.84k | } |
306 | | |
307 | | // Can't fit in buffer, so need to do at least one write. |
308 | 6 | Status status = FlushBuffer(); |
309 | 6 | if (!status.ok()) { |
310 | 0 | return status; |
311 | 0 | } |
312 | | |
313 | | // Small writes go to buffer, large writes are written directly. |
314 | 6 | if (write_size < kWritableFileBufferSize) { |
315 | 0 | std::memcpy(buf_, write_data, write_size); |
316 | 0 | pos_ = write_size; |
317 | 0 | return Status::OK(); |
318 | 0 | } |
319 | 6 | return WriteUnbuffered(write_data, write_size); |
320 | 6 | } |
321 | | |
322 | 529 | Status Close() override { |
323 | 529 | Status status = FlushBuffer(); |
324 | 529 | const int close_result = ::close(fd_); |
325 | 529 | if (close_result < 0 && status.ok()) { |
326 | 0 | status = PosixError(filename_, errno); |
327 | 0 | } |
328 | 529 | fd_ = -1; |
329 | 529 | return status; |
330 | 529 | } |
331 | | |
332 | 4.73k | Status Flush() override { return FlushBuffer(); } |
333 | | |
334 | 424 | Status Sync() override { |
335 | | // Ensure new files referred to by the manifest are in the filesystem. |
336 | | // |
337 | | // This needs to happen before the manifest file is flushed to disk, to |
338 | | // avoid crashing in a state where the manifest refers to files that are not |
339 | | // yet on disk. |
340 | 424 | Status status = SyncDirIfManifest(); |
341 | 424 | if (!status.ok()) { |
342 | 0 | return status; |
343 | 0 | } |
344 | | |
345 | 424 | status = FlushBuffer(); |
346 | 424 | if (!status.ok()) { |
347 | 0 | return status; |
348 | 0 | } |
349 | | |
350 | 424 | return SyncFd(fd_, filename_); |
351 | 424 | } |
352 | | |
353 | | private: |
354 | 5.69k | Status FlushBuffer() { |
355 | 5.69k | Status status = WriteUnbuffered(buf_, pos_); |
356 | 5.69k | pos_ = 0; |
357 | 5.69k | return status; |
358 | 5.69k | } |
359 | | |
360 | 5.69k | Status WriteUnbuffered(const char* data, size_t size) { |
361 | 10.6k | while (size > 0) { |
362 | 4.96k | ssize_t write_result = ::write(fd_, data, size); |
363 | 4.96k | if (write_result < 0) { |
364 | 0 | if (errno == EINTR) { |
365 | 0 | continue; // Retry |
366 | 0 | } |
367 | 0 | return PosixError(filename_, errno); |
368 | 0 | } |
369 | 4.96k | data += write_result; |
370 | 4.96k | size -= write_result; |
371 | 4.96k | } |
372 | 5.69k | return Status::OK(); |
373 | 5.69k | } |
374 | | |
375 | 424 | Status SyncDirIfManifest() { |
376 | 424 | Status status; |
377 | 424 | if (!is_manifest_) { |
378 | 219 | return status; |
379 | 219 | } |
380 | | |
381 | 205 | int fd = ::open(dirname_.c_str(), O_RDONLY | kOpenBaseFlags); |
382 | 205 | if (fd < 0) { |
383 | 0 | status = PosixError(dirname_, errno); |
384 | 205 | } else { |
385 | 205 | status = SyncFd(fd, dirname_); |
386 | 205 | ::close(fd); |
387 | 205 | } |
388 | 205 | return status; |
389 | 424 | } |
390 | | |
391 | | // Ensures that all the caches associated with the given file descriptor's |
392 | | // data are flushed all the way to durable media, and can withstand power |
393 | | // failures. |
394 | | // |
395 | | // The path argument is only used to populate the description string in the |
396 | | // returned Status if an error occurs. |
397 | 629 | static Status SyncFd(int fd, const std::string& fd_path) { |
398 | | #if HAVE_FULLFSYNC |
399 | | // On macOS and iOS, fsync() doesn't guarantee durability past power |
400 | | // failures. fcntl(F_FULLFSYNC) is required for that purpose. Some |
401 | | // filesystems don't support fcntl(F_FULLFSYNC), and require a fallback to |
402 | | // fsync(). |
403 | | if (::fcntl(fd, F_FULLFSYNC) == 0) { |
404 | | return Status::OK(); |
405 | | } |
406 | | #endif // HAVE_FULLFSYNC |
407 | | |
408 | 629 | #if HAVE_FDATASYNC |
409 | 629 | bool sync_success = ::fdatasync(fd) == 0; |
410 | | #else |
411 | | bool sync_success = ::fsync(fd) == 0; |
412 | | #endif // HAVE_FDATASYNC |
413 | | |
414 | 629 | if (sync_success) { |
415 | 629 | return Status::OK(); |
416 | 629 | } |
417 | 0 | return PosixError(fd_path, errno); |
418 | 629 | } |
419 | | |
420 | | // Returns the directory name in a path pointing to a file. |
421 | | // |
422 | | // Returns "." if the path does not contain any directory separator. |
423 | 529 | static std::string Dirname(const std::string& filename) { |
424 | 529 | std::string::size_type separator_pos = filename.rfind('/'); |
425 | 529 | if (separator_pos == std::string::npos) { |
426 | 0 | return std::string("."); |
427 | 0 | } |
428 | | // The filename component should not contain a path separator. If it does, |
429 | | // the splitting was done incorrectly. |
430 | 529 | assert(filename.find('/', separator_pos + 1) == std::string::npos); |
431 | | |
432 | 529 | return filename.substr(0, separator_pos); |
433 | 529 | } |
434 | | |
435 | | // Extracts the file name from a path pointing to a file. |
436 | | // |
437 | | // The returned Slice points to |filename|'s data buffer, so it is only valid |
438 | | // while |filename| is alive and unchanged. |
439 | 529 | static Slice Basename(const std::string& filename) { |
440 | 529 | std::string::size_type separator_pos = filename.rfind('/'); |
441 | 529 | if (separator_pos == std::string::npos) { |
442 | 0 | return Slice(filename); |
443 | 0 | } |
444 | | // The filename component should not contain a path separator. If it does, |
445 | | // the splitting was done incorrectly. |
446 | 529 | assert(filename.find('/', separator_pos + 1) == std::string::npos); |
447 | | |
448 | 529 | return Slice(filename.data() + separator_pos + 1, |
449 | 529 | filename.length() - separator_pos - 1); |
450 | 529 | } |
451 | | |
452 | | // True if the given file is a manifest file. |
453 | 529 | static bool IsManifest(const std::string& filename) { |
454 | 529 | return Basename(filename).starts_with("MANIFEST"); |
455 | 529 | } |
456 | | |
457 | | // buf_[0, pos_ - 1] contains data to be written to fd_. |
458 | | char buf_[kWritableFileBufferSize]; |
459 | | size_t pos_; |
460 | | int fd_; |
461 | | |
462 | | const bool is_manifest_; // True if the file's name starts with MANIFEST. |
463 | | const std::string filename_; |
464 | | const std::string dirname_; // The directory of filename_. |
465 | | }; |
466 | | |
467 | 226 | int LockOrUnlock(int fd, bool lock) { |
468 | 226 | errno = 0; |
469 | 226 | struct ::flock file_lock_info; |
470 | 226 | std::memset(&file_lock_info, 0, sizeof(file_lock_info)); |
471 | 226 | file_lock_info.l_type = (lock ? F_WRLCK : F_UNLCK); |
472 | 226 | file_lock_info.l_whence = SEEK_SET; |
473 | 226 | file_lock_info.l_start = 0; |
474 | 226 | file_lock_info.l_len = 0; // Lock/unlock entire file. |
475 | 226 | return ::fcntl(fd, F_SETLK, &file_lock_info); |
476 | 226 | } |
477 | | |
478 | | // Instances are thread-safe because they are immutable. |
479 | | class PosixFileLock : public FileLock { |
480 | | public: |
481 | | PosixFileLock(int fd, std::string filename) |
482 | 113 | : fd_(fd), filename_(std::move(filename)) {} |
483 | | |
484 | 226 | int fd() const { return fd_; } |
485 | 113 | const std::string& filename() const { return filename_; } |
486 | | |
487 | | private: |
488 | | const int fd_; |
489 | | const std::string filename_; |
490 | | }; |
491 | | |
492 | | // Tracks the files locked by PosixEnv::LockFile(). |
493 | | // |
494 | | // We maintain a separate set instead of relying on fcntl(F_SETLK) because |
495 | | // fcntl(F_SETLK) does not provide any protection against multiple uses from the |
496 | | // same process. |
497 | | // |
498 | | // Instances are thread-safe because all member data is guarded by a mutex. |
499 | | class PosixLockTable { |
500 | | public: |
501 | 113 | bool Insert(const std::string& fname) LOCKS_EXCLUDED(mu_) { |
502 | 113 | mu_.Lock(); |
503 | 113 | bool succeeded = locked_files_.insert(fname).second; |
504 | 113 | mu_.Unlock(); |
505 | 113 | return succeeded; |
506 | 113 | } |
507 | 113 | void Remove(const std::string& fname) LOCKS_EXCLUDED(mu_) { |
508 | 113 | mu_.Lock(); |
509 | 113 | locked_files_.erase(fname); |
510 | 113 | mu_.Unlock(); |
511 | 113 | } |
512 | | |
513 | | private: |
514 | | port::Mutex mu_; |
515 | | std::set<std::string> locked_files_ GUARDED_BY(mu_); |
516 | | }; |
517 | | |
518 | | class PosixEnv : public Env { |
519 | | public: |
520 | | PosixEnv(); |
521 | 0 | ~PosixEnv() override { |
522 | 0 | static const char msg[] = |
523 | 0 | "PosixEnv singleton destroyed. Unsupported behavior!\n"; |
524 | 0 | std::fwrite(msg, 1, sizeof(msg), stderr); |
525 | 0 | std::abort(); |
526 | 0 | } |
527 | | |
528 | | Status NewSequentialFile(const std::string& filename, |
529 | 276 | SequentialFile** result) override { |
530 | 276 | int fd = ::open(filename.c_str(), O_RDONLY | kOpenBaseFlags); |
531 | 276 | if (fd < 0) { |
532 | 0 | *result = nullptr; |
533 | 0 | return PosixError(filename, errno); |
534 | 0 | } |
535 | | |
536 | 276 | *result = new PosixSequentialFile(filename, fd); |
537 | 276 | return Status::OK(); |
538 | 276 | } |
539 | | |
540 | | Status NewRandomAccessFile(const std::string& filename, |
541 | 79 | RandomAccessFile** result) override { |
542 | 79 | *result = nullptr; |
543 | 79 | int fd = ::open(filename.c_str(), O_RDONLY | kOpenBaseFlags); |
544 | 79 | if (fd < 0) { |
545 | 0 | return PosixError(filename, errno); |
546 | 0 | } |
547 | | |
548 | 79 | if (!mmap_limiter_.Acquire()) { |
549 | 0 | *result = new PosixRandomAccessFile(filename, fd, &fd_limiter_); |
550 | 0 | return Status::OK(); |
551 | 0 | } |
552 | | |
553 | 79 | uint64_t file_size; |
554 | 79 | Status status = GetFileSize(filename, &file_size); |
555 | 79 | if (status.ok()) { |
556 | 79 | void* mmap_base = |
557 | 79 | ::mmap(/*addr=*/nullptr, file_size, PROT_READ, MAP_SHARED, fd, 0); |
558 | 79 | if (mmap_base != MAP_FAILED) { |
559 | 79 | *result = new PosixMmapReadableFile(filename, |
560 | 79 | reinterpret_cast<char*>(mmap_base), |
561 | 79 | file_size, &mmap_limiter_); |
562 | 79 | } else { |
563 | 0 | status = PosixError(filename, errno); |
564 | 0 | } |
565 | 79 | } |
566 | 79 | ::close(fd); |
567 | 79 | if (!status.ok()) { |
568 | 0 | mmap_limiter_.Release(); |
569 | 0 | } |
570 | 79 | return status; |
571 | 79 | } |
572 | | |
573 | | Status NewWritableFile(const std::string& filename, |
574 | 529 | WritableFile** result) override { |
575 | 529 | int fd = ::open(filename.c_str(), |
576 | 529 | O_TRUNC | O_WRONLY | O_CREAT | kOpenBaseFlags, 0644); |
577 | 529 | if (fd < 0) { |
578 | 0 | *result = nullptr; |
579 | 0 | return PosixError(filename, errno); |
580 | 0 | } |
581 | | |
582 | 529 | *result = new PosixWritableFile(filename, fd); |
583 | 529 | return Status::OK(); |
584 | 529 | } |
585 | | |
586 | | Status NewAppendableFile(const std::string& filename, |
587 | 0 | WritableFile** result) override { |
588 | 0 | int fd = ::open(filename.c_str(), |
589 | 0 | O_APPEND | O_WRONLY | O_CREAT | kOpenBaseFlags, 0644); |
590 | 0 | if (fd < 0) { |
591 | 0 | *result = nullptr; |
592 | 0 | return PosixError(filename, errno); |
593 | 0 | } |
594 | | |
595 | 0 | *result = new PosixWritableFile(filename, fd); |
596 | 0 | return Status::OK(); |
597 | 0 | } |
598 | | |
599 | 113 | bool FileExists(const std::string& filename) override { |
600 | 113 | return ::access(filename.c_str(), F_OK) == 0; |
601 | 113 | } |
602 | | |
603 | | Status GetChildren(const std::string& directory_path, |
604 | 254 | std::vector<std::string>* result) override { |
605 | 254 | result->clear(); |
606 | 254 | ::DIR* dir = ::opendir(directory_path.c_str()); |
607 | 254 | if (dir == nullptr) { |
608 | 0 | return PosixError(directory_path, errno); |
609 | 0 | } |
610 | 254 | struct ::dirent* entry; |
611 | 2.51k | while ((entry = ::readdir(dir)) != nullptr) { |
612 | 2.25k | result->emplace_back(entry->d_name); |
613 | 2.25k | } |
614 | 254 | ::closedir(dir); |
615 | 254 | return Status::OK(); |
616 | 254 | } |
617 | | |
618 | 224 | Status RemoveFile(const std::string& filename) override { |
619 | 224 | if (::unlink(filename.c_str()) != 0) { |
620 | 14 | return PosixError(filename, errno); |
621 | 14 | } |
622 | 210 | return Status::OK(); |
623 | 224 | } |
624 | | |
625 | 226 | Status CreateDir(const std::string& dirname) override { |
626 | 226 | if (::mkdir(dirname.c_str(), 0755) != 0) { |
627 | 162 | return PosixError(dirname, errno); |
628 | 162 | } |
629 | 64 | return Status::OK(); |
630 | 226 | } |
631 | | |
632 | 0 | Status RemoveDir(const std::string& dirname) override { |
633 | 0 | if (::rmdir(dirname.c_str()) != 0) { |
634 | 0 | return PosixError(dirname, errno); |
635 | 0 | } |
636 | 0 | return Status::OK(); |
637 | 0 | } |
638 | | |
639 | 79 | Status GetFileSize(const std::string& filename, uint64_t* size) override { |
640 | 79 | struct ::stat file_stat; |
641 | 79 | if (::stat(filename.c_str(), &file_stat) != 0) { |
642 | 0 | *size = 0; |
643 | 0 | return PosixError(filename, errno); |
644 | 0 | } |
645 | 79 | *size = file_stat.st_size; |
646 | 79 | return Status::OK(); |
647 | 79 | } |
648 | | |
649 | 290 | Status RenameFile(const std::string& from, const std::string& to) override { |
650 | 290 | if (std::rename(from.c_str(), to.c_str()) != 0) { |
651 | 64 | return PosixError(from, errno); |
652 | 64 | } |
653 | 226 | return Status::OK(); |
654 | 290 | } |
655 | | |
656 | 113 | Status LockFile(const std::string& filename, FileLock** lock) override { |
657 | 113 | *lock = nullptr; |
658 | | |
659 | 113 | int fd = ::open(filename.c_str(), O_RDWR | O_CREAT | kOpenBaseFlags, 0644); |
660 | 113 | if (fd < 0) { |
661 | 0 | return PosixError(filename, errno); |
662 | 0 | } |
663 | | |
664 | 113 | if (!locks_.Insert(filename)) { |
665 | 0 | ::close(fd); |
666 | 0 | return Status::IOError("lock " + filename, "already held by process"); |
667 | 0 | } |
668 | | |
669 | 113 | if (LockOrUnlock(fd, true) == -1) { |
670 | 0 | int lock_errno = errno; |
671 | 0 | ::close(fd); |
672 | 0 | locks_.Remove(filename); |
673 | 0 | return PosixError("lock " + filename, lock_errno); |
674 | 0 | } |
675 | | |
676 | 113 | *lock = new PosixFileLock(fd, filename); |
677 | 113 | return Status::OK(); |
678 | 113 | } |
679 | | |
680 | 113 | Status UnlockFile(FileLock* lock) override { |
681 | 113 | PosixFileLock* posix_file_lock = static_cast<PosixFileLock*>(lock); |
682 | 113 | if (LockOrUnlock(posix_file_lock->fd(), false) == -1) { |
683 | 0 | return PosixError("unlock " + posix_file_lock->filename(), errno); |
684 | 0 | } |
685 | 113 | locks_.Remove(posix_file_lock->filename()); |
686 | 113 | ::close(posix_file_lock->fd()); |
687 | 113 | delete posix_file_lock; |
688 | 113 | return Status::OK(); |
689 | 113 | } |
690 | | |
691 | | void Schedule(void (*background_work_function)(void* background_work_arg), |
692 | | void* background_work_arg) override; |
693 | | |
694 | | void StartThread(void (*thread_main)(void* thread_main_arg), |
695 | 0 | void* thread_main_arg) override { |
696 | 0 | std::thread new_thread(thread_main, thread_main_arg); |
697 | 0 | new_thread.detach(); |
698 | 0 | } |
699 | | |
700 | 0 | Status GetTestDirectory(std::string* result) override { |
701 | 0 | const char* env = std::getenv("TEST_TMPDIR"); |
702 | 0 | if (env && env[0] != '\0') { |
703 | 0 | *result = env; |
704 | 0 | } else { |
705 | 0 | char buf[100]; |
706 | 0 | std::snprintf(buf, sizeof(buf), "/tmp/leveldbtest-%d", |
707 | 0 | static_cast<int>(::geteuid())); |
708 | 0 | *result = buf; |
709 | 0 | } |
710 | | |
711 | | // The CreateDir status is ignored because the directory may already exist. |
712 | 0 | CreateDir(*result); |
713 | |
|
714 | 0 | return Status::OK(); |
715 | 0 | } |
716 | | |
717 | 113 | Status NewLogger(const std::string& filename, Logger** result) override { |
718 | 113 | int fd = ::open(filename.c_str(), |
719 | 113 | O_APPEND | O_WRONLY | O_CREAT | kOpenBaseFlags, 0644); |
720 | 113 | if (fd < 0) { |
721 | 0 | *result = nullptr; |
722 | 0 | return PosixError(filename, errno); |
723 | 0 | } |
724 | | |
725 | 113 | std::FILE* fp = ::fdopen(fd, "w"); |
726 | 113 | if (fp == nullptr) { |
727 | 0 | ::close(fd); |
728 | 0 | *result = nullptr; |
729 | 0 | return PosixError(filename, errno); |
730 | 113 | } else { |
731 | 113 | *result = new PosixLogger(fp); |
732 | 113 | return Status::OK(); |
733 | 113 | } |
734 | 113 | } |
735 | | |
736 | 112 | uint64_t NowMicros() override { |
737 | 112 | static constexpr uint64_t kUsecondsPerSecond = 1000000; |
738 | 112 | struct ::timeval tv; |
739 | 112 | ::gettimeofday(&tv, nullptr); |
740 | 112 | return static_cast<uint64_t>(tv.tv_sec) * kUsecondsPerSecond + tv.tv_usec; |
741 | 112 | } |
742 | | |
743 | 0 | void SleepForMicroseconds(int micros) override { |
744 | 0 | std::this_thread::sleep_for(std::chrono::microseconds(micros)); |
745 | 0 | } |
746 | | |
747 | | private: |
748 | | void BackgroundThreadMain(); |
749 | | |
750 | 1 | static void BackgroundThreadEntryPoint(PosixEnv* env) { |
751 | 1 | env->BackgroundThreadMain(); |
752 | 1 | } |
753 | | |
754 | | // Stores the work item data in a Schedule() call. |
755 | | // |
756 | | // Instances are constructed on the thread calling Schedule() and used on the |
757 | | // background thread. |
758 | | // |
759 | | // This structure is thread-safe because it is immutable. |
760 | | struct BackgroundWorkItem { |
761 | | explicit BackgroundWorkItem(void (*function)(void* arg), void* arg) |
762 | 48 | : function(function), arg(arg) {} |
763 | | |
764 | | void (*const function)(void*); |
765 | | void* const arg; |
766 | | }; |
767 | | |
768 | | port::Mutex background_work_mutex_; |
769 | | port::CondVar background_work_cv_ GUARDED_BY(background_work_mutex_); |
770 | | bool started_background_thread_ GUARDED_BY(background_work_mutex_); |
771 | | |
772 | | std::queue<BackgroundWorkItem> background_work_queue_ |
773 | | GUARDED_BY(background_work_mutex_); |
774 | | |
775 | | PosixLockTable locks_; // Thread-safe. |
776 | | Limiter mmap_limiter_; // Thread-safe. |
777 | | Limiter fd_limiter_; // Thread-safe. |
778 | | }; |
779 | | |
780 | | // Return the maximum number of concurrent mmaps. |
781 | 1 | int MaxMmaps() { return g_mmap_limit; } |
782 | | |
783 | | // Return the maximum number of read-only files to keep open. |
784 | 1 | int MaxOpenFiles() { |
785 | 1 | if (g_open_read_only_file_limit >= 0) { |
786 | 0 | return g_open_read_only_file_limit; |
787 | 0 | } |
788 | | #ifdef __Fuchsia__ |
789 | | // Fuchsia doesn't implement getrlimit. |
790 | | g_open_read_only_file_limit = 50; |
791 | | #else |
792 | 1 | struct ::rlimit rlim; |
793 | 1 | if (::getrlimit(RLIMIT_NOFILE, &rlim)) { |
794 | | // getrlimit failed, fallback to hard-coded default. |
795 | 0 | g_open_read_only_file_limit = 50; |
796 | 1 | } else if (rlim.rlim_cur == RLIM_INFINITY) { |
797 | 0 | g_open_read_only_file_limit = std::numeric_limits<int>::max(); |
798 | 1 | } else { |
799 | | // Allow use of 20% of available file descriptors for read-only files. |
800 | 1 | g_open_read_only_file_limit = rlim.rlim_cur / 5; |
801 | 1 | } |
802 | 1 | #endif |
803 | 1 | return g_open_read_only_file_limit; |
804 | 1 | } |
805 | | |
806 | | } // namespace |
807 | | |
808 | | PosixEnv::PosixEnv() |
809 | 1 | : background_work_cv_(&background_work_mutex_), |
810 | 1 | started_background_thread_(false), |
811 | 1 | mmap_limiter_(MaxMmaps()), |
812 | 1 | fd_limiter_(MaxOpenFiles()) {} |
813 | | |
814 | | void PosixEnv::Schedule( |
815 | | void (*background_work_function)(void* background_work_arg), |
816 | 48 | void* background_work_arg) { |
817 | 48 | background_work_mutex_.Lock(); |
818 | | |
819 | | // Start the background thread, if we haven't done so already. |
820 | 48 | if (!started_background_thread_) { |
821 | 1 | started_background_thread_ = true; |
822 | 1 | std::thread background_thread(PosixEnv::BackgroundThreadEntryPoint, this); |
823 | 1 | background_thread.detach(); |
824 | 1 | } |
825 | | |
826 | | // If the queue is empty, the background thread may be waiting for work. |
827 | 48 | if (background_work_queue_.empty()) { |
828 | 48 | background_work_cv_.Signal(); |
829 | 48 | } |
830 | | |
831 | 48 | background_work_queue_.emplace(background_work_function, background_work_arg); |
832 | 48 | background_work_mutex_.Unlock(); |
833 | 48 | } |
834 | | |
835 | 1 | void PosixEnv::BackgroundThreadMain() { |
836 | 50 | while (true) { |
837 | 49 | background_work_mutex_.Lock(); |
838 | | |
839 | | // Wait until there is work to be done. |
840 | 93 | while (background_work_queue_.empty()) { |
841 | 44 | background_work_cv_.Wait(); |
842 | 44 | } |
843 | | |
844 | 49 | assert(!background_work_queue_.empty()); |
845 | 49 | auto background_work_function = background_work_queue_.front().function; |
846 | 49 | void* background_work_arg = background_work_queue_.front().arg; |
847 | 49 | background_work_queue_.pop(); |
848 | | |
849 | 49 | background_work_mutex_.Unlock(); |
850 | 49 | background_work_function(background_work_arg); |
851 | 49 | } |
852 | 1 | } |
853 | | |
854 | | namespace { |
855 | | |
856 | | // Wraps an Env instance whose destructor is never created. |
857 | | // |
858 | | // Intended usage: |
859 | | // using PlatformSingletonEnv = SingletonEnv<PlatformEnv>; |
860 | | // void ConfigurePosixEnv(int param) { |
861 | | // PlatformSingletonEnv::AssertEnvNotInitialized(); |
862 | | // // set global configuration flags. |
863 | | // } |
864 | | // Env* Env::Default() { |
865 | | // static PlatformSingletonEnv default_env; |
866 | | // return default_env.env(); |
867 | | // } |
868 | | template <typename EnvType> |
869 | | class SingletonEnv { |
870 | | public: |
871 | 1 | SingletonEnv() { |
872 | | #if !defined(NDEBUG) |
873 | | env_initialized_.store(true, std::memory_order_relaxed); |
874 | | #endif // !defined(NDEBUG) |
875 | 1 | static_assert(sizeof(env_storage_) >= sizeof(EnvType), |
876 | 1 | "env_storage_ will not fit the Env"); |
877 | 1 | static_assert(std::is_standard_layout_v<SingletonEnv<EnvType>>); |
878 | 1 | static_assert( |
879 | 1 | offsetof(SingletonEnv<EnvType>, env_storage_) % alignof(EnvType) == 0, |
880 | 1 | "env_storage_ does not meet the Env's alignment needs"); |
881 | 1 | static_assert(alignof(SingletonEnv<EnvType>) % alignof(EnvType) == 0, |
882 | 1 | "env_storage_ does not meet the Env's alignment needs"); |
883 | 1 | new (env_storage_) EnvType(); |
884 | 1 | } |
885 | | ~SingletonEnv() = default; |
886 | | |
887 | | SingletonEnv(const SingletonEnv&) = delete; |
888 | | SingletonEnv& operator=(const SingletonEnv&) = delete; |
889 | | |
890 | 192 | Env* env() { return reinterpret_cast<Env*>(&env_storage_); } |
891 | | |
892 | 0 | static void AssertEnvNotInitialized() { |
893 | | #if !defined(NDEBUG) |
894 | | assert(!env_initialized_.load(std::memory_order_relaxed)); |
895 | | #endif // !defined(NDEBUG) |
896 | 0 | } |
897 | | |
898 | | private: |
899 | | alignas(EnvType) char env_storage_[sizeof(EnvType)]; |
900 | | #if !defined(NDEBUG) |
901 | | static std::atomic<bool> env_initialized_; |
902 | | #endif // !defined(NDEBUG) |
903 | | }; |
904 | | |
905 | | #if !defined(NDEBUG) |
906 | | template <typename EnvType> |
907 | | std::atomic<bool> SingletonEnv<EnvType>::env_initialized_; |
908 | | #endif // !defined(NDEBUG) |
909 | | |
910 | | using PosixDefaultEnv = SingletonEnv<PosixEnv>; |
911 | | |
912 | | } // namespace |
913 | | |
914 | 0 | void EnvPosixTestHelper::SetReadOnlyFDLimit(int limit) { |
915 | 0 | PosixDefaultEnv::AssertEnvNotInitialized(); |
916 | 0 | g_open_read_only_file_limit = limit; |
917 | 0 | } |
918 | | |
919 | 0 | void EnvPosixTestHelper::SetReadOnlyMMapLimit(int limit) { |
920 | 0 | PosixDefaultEnv::AssertEnvNotInitialized(); |
921 | 0 | g_mmap_limit = limit; |
922 | 0 | } |
923 | | |
924 | 192 | Env* Env::Default() { |
925 | 192 | static PosixDefaultEnv env_container; |
926 | 192 | return env_container.env(); |
927 | 192 | } |
928 | | |
929 | | } // namespace leveldb |