Line data Source code
1 : // Copyright 2012 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package vfs // import "github.com/cockroachdb/pebble/vfs"
6 :
7 : import (
8 : "bytes"
9 : "fmt"
10 : "io"
11 : "math/rand/v2"
12 : "os"
13 : "path"
14 : "slices"
15 : "sort"
16 : "strings"
17 : "sync"
18 : "sync/atomic"
19 : "syscall"
20 : "time"
21 :
22 : "github.com/cockroachdb/errors"
23 : "github.com/cockroachdb/errors/oserror"
24 : "github.com/cockroachdb/pebble/internal/invariants"
25 : )
26 :
27 : const sep = "/"
28 :
29 : // NewMem returns a new memory-backed FS implementation.
30 1 : func NewMem() *MemFS {
31 1 : return &MemFS{
32 1 : root: newRootMemNode(),
33 1 : }
34 1 : }
35 :
36 : // NewCrashableMem returns a memory-backed FS implementation that supports the
37 : // CrashClone() method. This method can be used to obtain a copy of the FS after
38 : // a simulated crash, where only data that was last synced is guaranteed to be
39 : // there (with no guarantees one way or the other about more recently written
40 : // data).
41 : //
42 : // Note: when CrashClone() is not necessary, NewMem() is faster and should be
43 : // preferred.
44 : //
45 : // Expected usage:
46 : //
47 : // fs := NewCrashableMem()
48 : // db := Open(..., &Options{FS: fs})
49 : // // Do and commit various operations.
50 : // ...
51 : // // Make a clone of the FS after a simulated crash.
52 : // crashedFS := fs.CrashClone(CrashCloneCfg{Probability: 50, RNG: rand.New(rand.NewSource(0))})
53 : //
54 : // // This will finish any ongoing background flushes, compactions but none of these writes will
55 : // // affect crashedFS.
56 : // db.Close()
57 : //
58 : // // Open the DB against the crash clone.
59 : // db := Open(..., &Options{FS: crashedFS})
60 1 : func NewCrashableMem() *MemFS {
61 1 : return &MemFS{
62 1 : root: newRootMemNode(),
63 1 : crashable: true,
64 1 : }
65 1 : }
66 :
67 : // NewMemFile returns a memory-backed File implementation. The memory-backed
68 : // file takes ownership of data.
69 0 : func NewMemFile(data []byte) File {
70 0 : n := &memNode{}
71 0 : n.refs.Store(1)
72 0 : n.mu.data = data
73 0 : n.mu.modTime = time.Now()
74 0 : return &memFile{
75 0 : n: n,
76 0 : read: true,
77 0 : }
78 0 : }
79 :
80 : // MemFS implements FS.
81 : type MemFS struct {
82 : mu sync.Mutex
83 : root *memNode
84 :
85 : // cloneMu is used to block all modification operations while we clone the
86 : // filesystem. Only used when crashable is true.
87 : cloneMu sync.RWMutex
88 :
89 : // lockFiles holds a map of open file locks. Presence in this map indicates
90 : // a file lock is currently held. Keys are strings holding the path of the
91 : // locked file. The stored value is untyped and unused; only presence of
92 : // the key within the map is significant.
93 : lockedFiles sync.Map
94 : crashable bool
95 : // Windows has peculiar semantics with respect to hard links and deleting
96 : // open files. In tests meant to exercise this behavior, this flag can be
97 : // set to error if removing an open file.
98 : windowsSemantics bool
99 : }
100 :
101 : var _ FS = &MemFS{}
102 :
103 : // UseWindowsSemantics configures whether the MemFS implements Windows-style
104 : // semantics, in particular with respect to whether any of an open file's links
105 : // may be removed. Windows semantics default to off.
106 0 : func (y *MemFS) UseWindowsSemantics(windowsSemantics bool) {
107 0 : y.mu.Lock()
108 0 : defer y.mu.Unlock()
109 0 : y.windowsSemantics = windowsSemantics
110 0 : }
111 :
112 : // String dumps the contents of the MemFS.
113 0 : func (y *MemFS) String() string {
114 0 : y.mu.Lock()
115 0 : defer y.mu.Unlock()
116 0 :
117 0 : s := new(bytes.Buffer)
118 0 : y.root.dump(s, 0, sep)
119 0 : return s.String()
120 0 : }
121 :
122 : // CrashCloneCfg configures a CrashClone call. The zero value corresponds to the
123 : // crash clone containing exactly the data that was last synced.
124 : type CrashCloneCfg struct {
125 : // UnsyncedDataPercent is the probability that a data block or directory entry
126 : // that was not synced will be part of the clone. If 0, the clone will contain
127 : // exactly the data that was last synced. If 100, the clone will be identical
128 : // to the current filesystem.
129 : UnsyncedDataPercent int
130 : // RNG must be set if UnsyncedDataPercent > 0.
131 : RNG *rand.Rand
132 : }
133 :
134 : // CrashClone creates a new filesystem that reflects a possible state of this
135 : // filesystem after a crash at this moment. The new filesystem will contain all
136 : // data that was synced, and some fraction of the data that was not synced. The
137 : // latter is controlled by CrashCloneCfg.
138 1 : func (y *MemFS) CrashClone(cfg CrashCloneCfg) *MemFS {
139 1 : if !y.crashable {
140 0 : panic("not a crashable MemFS")
141 : }
142 : // Block all modification operations while we clone.
143 1 : y.cloneMu.Lock()
144 1 : defer y.cloneMu.Unlock()
145 1 : newFS := &MemFS{crashable: true}
146 1 : newFS.windowsSemantics = y.windowsSemantics
147 1 : newFS.root = y.root.CrashClone(&cfg)
148 1 : return newFS
149 : }
150 :
151 : // walk walks the directory tree for the fullname, calling f at each step. If
152 : // f returns an error, the walk will be aborted and return that same error.
153 : //
154 : // Each walk is atomic: y's mutex is held for the entire operation, including
155 : // all calls to f.
156 : //
157 : // dir is the directory at that step, frag is the name fragment, and final is
158 : // whether it is the final step. For example, walking "/foo/bar/x" will result
159 : // in 3 calls to f:
160 : // - "/", "foo", false
161 : // - "/foo/", "bar", false
162 : // - "/foo/bar/", "x", true
163 : //
164 : // Similarly, walking "/y/z/", with a trailing slash, will result in 3 calls to f:
165 : // - "/", "y", false
166 : // - "/y/", "z", false
167 : // - "/y/z/", "", true
168 1 : func (y *MemFS) walk(fullname string, f func(dir *memNode, frag string, final bool) error) error {
169 1 : y.mu.Lock()
170 1 : defer y.mu.Unlock()
171 1 :
172 1 : // For memfs, the current working directory is the same as the root directory,
173 1 : // so we strip off any leading "/"s to make fullname a relative path, and
174 1 : // the walk starts at y.root.
175 1 : for len(fullname) > 0 && fullname[0] == sep[0] {
176 1 : fullname = fullname[1:]
177 1 : }
178 1 : if fullname == "." {
179 1 : fullname = ""
180 1 : }
181 1 : dir := y.root
182 1 :
183 1 : for {
184 1 : frag, remaining := fullname, ""
185 1 : i := strings.IndexRune(fullname, rune(sep[0]))
186 1 : final := i < 0
187 1 : if !final {
188 1 : frag, remaining = fullname[:i], fullname[i+1:]
189 1 : for len(remaining) > 0 && remaining[0] == sep[0] {
190 0 : remaining = remaining[1:]
191 0 : }
192 : }
193 1 : if err := f(dir, frag, final); err != nil {
194 1 : return err
195 1 : }
196 1 : if final {
197 1 : break
198 : }
199 1 : child := dir.children[frag]
200 1 : if child == nil {
201 1 : return &os.PathError{
202 1 : Op: "open",
203 1 : Path: fullname,
204 1 : Err: oserror.ErrNotExist,
205 1 : }
206 1 : }
207 1 : if !child.isDir {
208 0 : return &os.PathError{
209 0 : Op: "open",
210 0 : Path: fullname,
211 0 : Err: errors.New("not a directory"),
212 0 : }
213 0 : }
214 1 : dir, fullname = child, remaining
215 : }
216 1 : return nil
217 : }
218 :
219 : // Create implements FS.Create.
220 1 : func (y *MemFS) Create(fullname string, category DiskWriteCategory) (File, error) {
221 1 : if y.crashable {
222 1 : y.cloneMu.RLock()
223 1 : defer y.cloneMu.RUnlock()
224 1 : }
225 1 : var ret *memFile
226 1 : err := y.walk(fullname, func(dir *memNode, frag string, final bool) error {
227 1 : if final {
228 1 : if frag == "" {
229 0 : return errors.New("pebble/vfs: empty file name")
230 0 : }
231 1 : n := &memNode{}
232 1 : dir.children[frag] = n
233 1 : ret = &memFile{
234 1 : name: frag,
235 1 : n: n,
236 1 : fs: y,
237 1 : read: true,
238 1 : write: true,
239 1 : }
240 : }
241 1 : return nil
242 : })
243 1 : if err != nil {
244 0 : return nil, err
245 0 : }
246 1 : ret.n.refs.Add(1)
247 1 : return ret, nil
248 : }
249 :
250 : // Link implements FS.Link.
251 1 : func (y *MemFS) Link(oldname, newname string) error {
252 1 : if y.crashable {
253 1 : y.cloneMu.RLock()
254 1 : defer y.cloneMu.RUnlock()
255 1 : }
256 1 : var n *memNode
257 1 : err := y.walk(oldname, func(dir *memNode, frag string, final bool) error {
258 1 : if final {
259 1 : if frag == "" {
260 0 : return errors.New("pebble/vfs: empty file name")
261 0 : }
262 1 : n = dir.children[frag]
263 : }
264 1 : return nil
265 : })
266 1 : if err != nil {
267 0 : return err
268 0 : }
269 1 : if n == nil {
270 0 : return &os.LinkError{
271 0 : Op: "link",
272 0 : Old: oldname,
273 0 : New: newname,
274 0 : Err: oserror.ErrNotExist,
275 0 : }
276 0 : }
277 1 : return y.walk(newname, func(dir *memNode, frag string, final bool) error {
278 1 : if final {
279 1 : if frag == "" {
280 0 : return errors.New("pebble/vfs: empty file name")
281 0 : }
282 1 : y.cloneMu.RLock()
283 1 : defer y.cloneMu.RUnlock()
284 1 : if _, ok := dir.children[frag]; ok {
285 0 : return &os.LinkError{
286 0 : Op: "link",
287 0 : Old: oldname,
288 0 : New: newname,
289 0 : Err: oserror.ErrExist,
290 0 : }
291 0 : }
292 1 : dir.children[frag] = n
293 : }
294 1 : return nil
295 : })
296 : }
297 :
298 1 : func (y *MemFS) open(fullname string, openForWrite bool) (File, error) {
299 1 : var ret *memFile
300 1 : err := y.walk(fullname, func(dir *memNode, frag string, final bool) error {
301 1 : if final {
302 1 : if frag == "" {
303 1 : ret = &memFile{
304 1 : name: sep, // this is the root directory
305 1 : n: dir,
306 1 : fs: y,
307 1 : }
308 1 : return nil
309 1 : }
310 1 : if n := dir.children[frag]; n != nil {
311 1 : ret = &memFile{
312 1 : name: frag,
313 1 : n: n,
314 1 : fs: y,
315 1 : read: true,
316 1 : write: openForWrite,
317 1 : }
318 1 : }
319 : }
320 1 : return nil
321 : })
322 1 : if err != nil {
323 1 : return nil, err
324 1 : }
325 1 : if ret == nil {
326 1 : return nil, &os.PathError{
327 1 : Op: "open",
328 1 : Path: fullname,
329 1 : Err: oserror.ErrNotExist,
330 1 : }
331 1 : }
332 1 : ret.n.refs.Add(1)
333 1 : return ret, nil
334 : }
335 :
336 : // Open implements FS.Open.
337 1 : func (y *MemFS) Open(fullname string, opts ...OpenOption) (File, error) {
338 1 : return y.open(fullname, false /* openForWrite */)
339 1 : }
340 :
341 : // OpenReadWrite implements FS.OpenReadWrite.
342 : func (y *MemFS) OpenReadWrite(
343 : fullname string, category DiskWriteCategory, opts ...OpenOption,
344 1 : ) (File, error) {
345 1 : f, err := y.open(fullname, true /* openForWrite */)
346 1 : pathErr, ok := err.(*os.PathError)
347 1 : if ok && pathErr.Err == oserror.ErrNotExist {
348 1 : return y.Create(fullname, category)
349 1 : }
350 0 : return f, err
351 : }
352 :
353 : // OpenDir implements FS.OpenDir.
354 1 : func (y *MemFS) OpenDir(fullname string) (File, error) {
355 1 : return y.open(fullname, false /* openForWrite */)
356 1 : }
357 :
358 : // Remove implements FS.Remove.
359 1 : func (y *MemFS) Remove(fullname string) error {
360 1 : if y.crashable {
361 1 : y.cloneMu.RLock()
362 1 : defer y.cloneMu.RUnlock()
363 1 : }
364 1 : return y.walk(fullname, func(dir *memNode, frag string, final bool) error {
365 1 : if final {
366 1 : if frag == "" {
367 0 : return errors.New("pebble/vfs: empty file name")
368 0 : }
369 1 : child, ok := dir.children[frag]
370 1 : if !ok {
371 1 : return oserror.ErrNotExist
372 1 : }
373 1 : if y.windowsSemantics {
374 0 : // Disallow removal of open files/directories which implements
375 0 : // Windows semantics. This ensures that we don't regress in the
376 0 : // ordering of operations and try to remove a file while it is
377 0 : // still open.
378 0 : if n := child.refs.Load(); n > 0 {
379 0 : return oserror.ErrInvalid
380 0 : }
381 : }
382 1 : if len(child.children) > 0 {
383 0 : return errNotEmpty
384 0 : }
385 1 : delete(dir.children, frag)
386 : }
387 1 : return nil
388 : })
389 : }
390 :
391 : // RemoveAll implements FS.RemoveAll.
392 0 : func (y *MemFS) RemoveAll(fullname string) error {
393 0 : if y.crashable {
394 0 : y.cloneMu.RLock()
395 0 : defer y.cloneMu.RUnlock()
396 0 : }
397 0 : err := y.walk(fullname, func(dir *memNode, frag string, final bool) error {
398 0 : if final {
399 0 : if frag == "" {
400 0 : return errors.New("pebble/vfs: empty file name")
401 0 : }
402 0 : _, ok := dir.children[frag]
403 0 : if !ok {
404 0 : return nil
405 0 : }
406 0 : delete(dir.children, frag)
407 : }
408 0 : return nil
409 : })
410 : // Match os.RemoveAll which returns a nil error even if the parent
411 : // directories don't exist.
412 0 : if oserror.IsNotExist(err) {
413 0 : err = nil
414 0 : }
415 0 : return err
416 : }
417 :
418 : // Rename implements FS.Rename.
419 1 : func (y *MemFS) Rename(oldname, newname string) error {
420 1 : if y.crashable {
421 1 : y.cloneMu.RLock()
422 1 : defer y.cloneMu.RUnlock()
423 1 : }
424 1 : var n *memNode
425 1 : err := y.walk(oldname, func(dir *memNode, frag string, final bool) error {
426 1 : if final {
427 1 : if frag == "" {
428 0 : return errors.New("pebble/vfs: empty file name")
429 0 : }
430 1 : n = dir.children[frag]
431 1 : delete(dir.children, frag)
432 : }
433 1 : return nil
434 : })
435 1 : if err != nil {
436 0 : return err
437 0 : }
438 1 : if n == nil {
439 1 : return &os.PathError{
440 1 : Op: "open",
441 1 : Path: oldname,
442 1 : Err: oserror.ErrNotExist,
443 1 : }
444 1 : }
445 1 : return y.walk(newname, func(dir *memNode, frag string, final bool) error {
446 1 : if final {
447 1 : if frag == "" {
448 0 : return errors.New("pebble/vfs: empty file name")
449 0 : }
450 1 : dir.children[frag] = n
451 : }
452 1 : return nil
453 : })
454 : }
455 :
456 : // ReuseForWrite implements FS.ReuseForWrite.
457 0 : func (y *MemFS) ReuseForWrite(oldname, newname string, category DiskWriteCategory) (File, error) {
458 0 : if y.crashable {
459 0 : y.cloneMu.RLock()
460 0 : defer y.cloneMu.RUnlock()
461 0 : }
462 0 : if err := y.Rename(oldname, newname); err != nil {
463 0 : return nil, err
464 0 : }
465 0 : f, err := y.Open(newname)
466 0 : if err != nil {
467 0 : return nil, err
468 0 : }
469 0 : y.mu.Lock()
470 0 : defer y.mu.Unlock()
471 0 :
472 0 : mf := f.(*memFile)
473 0 : mf.read = false
474 0 : mf.write = true
475 0 : return f, nil
476 : }
477 :
478 : // MkdirAll implements FS.MkdirAll.
479 1 : func (y *MemFS) MkdirAll(dirname string, perm os.FileMode) error {
480 1 : if y.crashable {
481 1 : y.cloneMu.RLock()
482 1 : defer y.cloneMu.RUnlock()
483 1 : }
484 1 : return y.walk(dirname, func(dir *memNode, frag string, final bool) error {
485 1 : if frag == "" {
486 0 : if final {
487 0 : return nil
488 0 : }
489 0 : return errors.New("pebble/vfs: empty file name")
490 : }
491 1 : child := dir.children[frag]
492 1 : if child == nil {
493 1 : dir.children[frag] = &memNode{
494 1 : children: make(map[string]*memNode),
495 1 : isDir: true,
496 1 : }
497 1 : return nil
498 1 : }
499 1 : if !child.isDir {
500 0 : return &os.PathError{
501 0 : Op: "open",
502 0 : Path: dirname,
503 0 : Err: errors.New("not a directory"),
504 0 : }
505 0 : }
506 1 : return nil
507 : })
508 : }
509 :
510 : // Lock implements FS.Lock.
511 1 : func (y *MemFS) Lock(fullname string) (io.Closer, error) {
512 1 : if y.crashable {
513 1 : y.cloneMu.RLock()
514 1 : defer y.cloneMu.RUnlock()
515 1 : }
516 : // FS.Lock excludes other processes, but other processes cannot see this
517 : // process' memory. However some uses (eg, Cockroach tests) may open and
518 : // close the same MemFS-backed database multiple times. We want mutual
519 : // exclusion in this case too. See cockroachdb/cockroach#110645.
520 1 : _, loaded := y.lockedFiles.Swap(fullname, nil /* the value itself is insignificant */)
521 1 : if loaded {
522 0 : // This file lock has already been acquired. On unix, this results in
523 0 : // either EACCES or EAGAIN so we mimic.
524 0 : return nil, syscall.EAGAIN
525 0 : }
526 : // Otherwise, we successfully acquired the lock. Locks are visible in the
527 : // parent directory listing, and they also must be created under an existent
528 : // directory. Create the path so that we have the normal detection of
529 : // non-existent directory paths, and make the lock visible when listing
530 : // directory entries.
531 1 : f, err := y.Create(fullname, WriteCategoryUnspecified)
532 1 : if err != nil {
533 0 : // "Release" the lock since we failed.
534 0 : y.lockedFiles.Delete(fullname)
535 0 : return nil, err
536 0 : }
537 1 : return &memFileLock{
538 1 : y: y,
539 1 : f: f,
540 1 : fullname: fullname,
541 1 : }, nil
542 : }
543 :
544 : // List implements FS.List.
545 1 : func (y *MemFS) List(dirname string) ([]string, error) {
546 1 : if !strings.HasSuffix(dirname, sep) {
547 1 : dirname += sep
548 1 : }
549 1 : var ret []string
550 1 : err := y.walk(dirname, func(dir *memNode, frag string, final bool) error {
551 1 : if final {
552 1 : if frag != "" {
553 0 : panic("unreachable")
554 : }
555 1 : ret = make([]string, 0, len(dir.children))
556 1 : for s := range dir.children {
557 1 : ret = append(ret, s)
558 1 : }
559 : }
560 1 : return nil
561 : })
562 1 : return ret, err
563 : }
564 :
565 : // Stat implements FS.Stat.
566 1 : func (y *MemFS) Stat(name string) (FileInfo, error) {
567 1 : f, err := y.Open(name)
568 1 : if err != nil {
569 1 : if pe, ok := err.(*os.PathError); ok {
570 1 : pe.Op = "stat"
571 1 : }
572 1 : return nil, err
573 : }
574 1 : defer f.Close()
575 1 : return f.Stat()
576 : }
577 :
578 : // PathBase implements FS.PathBase.
579 1 : func (*MemFS) PathBase(p string) string {
580 1 : // Note that MemFS uses forward slashes for its separator, hence the use of
581 1 : // path.Base, not filepath.Base.
582 1 : return path.Base(p)
583 1 : }
584 :
585 : // PathJoin implements FS.PathJoin.
586 1 : func (*MemFS) PathJoin(elem ...string) string {
587 1 : // Note that MemFS uses forward slashes for its separator, hence the use of
588 1 : // path.Join, not filepath.Join.
589 1 : return path.Join(elem...)
590 1 : }
591 :
592 : // PathDir implements FS.PathDir.
593 1 : func (*MemFS) PathDir(p string) string {
594 1 : // Note that MemFS uses forward slashes for its separator, hence the use of
595 1 : // path.Dir, not filepath.Dir.
596 1 : return path.Dir(p)
597 1 : }
598 :
599 : // GetDiskUsage implements FS.GetDiskUsage.
600 1 : func (*MemFS) GetDiskUsage(string) (DiskUsage, error) {
601 1 : return DiskUsage{}, ErrUnsupported
602 1 : }
603 :
604 : // Unwrap implements FS.Unwrap.
605 1 : func (*MemFS) Unwrap() FS { return nil }
606 :
607 : // memNode holds a file's data or a directory's children.
608 : type memNode struct {
609 : isDir bool
610 : refs atomic.Int32
611 :
612 : // Mutable state.
613 : // - For a file: data, syncedDate, modTime: A file is only being mutated by a single goroutine,
614 : // but there can be concurrent readers e.g. DB.Checkpoint() which can read WAL or MANIFEST
615 : // files that are being written to. Additionally Sync() calls can be concurrent with writing.
616 : // - For a directory: children and syncedChildren. Concurrent writes are possible, and
617 : // these are protected using MemFS.mu.
618 : mu struct {
619 : sync.Mutex
620 : data []byte
621 : syncedData []byte
622 : modTime time.Time
623 : }
624 :
625 : children map[string]*memNode
626 : syncedChildren map[string]*memNode
627 : }
628 :
629 1 : func newRootMemNode() *memNode {
630 1 : return &memNode{
631 1 : children: make(map[string]*memNode),
632 1 : isDir: true,
633 1 : }
634 1 : }
635 :
636 1 : func cloneChildren(f map[string]*memNode) map[string]*memNode {
637 1 : m := make(map[string]*memNode)
638 1 : for k, v := range f {
639 1 : m[k] = v
640 1 : }
641 1 : return m
642 : }
643 :
644 0 : func (f *memNode) dump(w *bytes.Buffer, level int, name string) {
645 0 : if f.isDir {
646 0 : w.WriteString(" ")
647 0 : } else {
648 0 : f.mu.Lock()
649 0 : fmt.Fprintf(w, "%8d ", len(f.mu.data))
650 0 : f.mu.Unlock()
651 0 : }
652 0 : for i := 0; i < level; i++ {
653 0 : w.WriteString(" ")
654 0 : }
655 0 : w.WriteString(name)
656 0 : if !f.isDir {
657 0 : w.WriteByte('\n')
658 0 : return
659 0 : }
660 0 : if level > 0 { // deal with the fact that the root's name is already "/"
661 0 : w.WriteByte(sep[0])
662 0 : }
663 0 : w.WriteByte('\n')
664 0 : names := make([]string, 0, len(f.children))
665 0 : for name := range f.children {
666 0 : names = append(names, name)
667 0 : }
668 0 : sort.Strings(names)
669 0 : for _, name := range names {
670 0 : f.children[name].dump(w, level+1, name)
671 0 : }
672 : }
673 :
674 : // CrashClone creates a crash-consistent clone of the subtree rooted at f, and
675 : // returns the new subtree. cloneMu must be held (in write mode).
676 1 : func (f *memNode) CrashClone(cfg *CrashCloneCfg) *memNode {
677 1 : newNode := &memNode{isDir: f.isDir}
678 1 : if f.isDir {
679 1 : newNode.children = cloneChildren(f.syncedChildren)
680 1 : // Randomly include some non-synced children.
681 1 : for name, child := range f.children {
682 1 : if cfg.UnsyncedDataPercent > 0 && cfg.RNG.IntN(100) < cfg.UnsyncedDataPercent {
683 0 : newNode.children[name] = child
684 0 : }
685 : }
686 1 : for name, child := range newNode.children {
687 1 : newNode.children[name] = child.CrashClone(cfg)
688 1 : }
689 1 : newNode.syncedChildren = cloneChildren(newNode.children)
690 1 : } else {
691 1 : newNode.mu.data = slices.Clone(f.mu.syncedData)
692 1 : newNode.mu.modTime = f.mu.modTime
693 1 : // Randomly include some non-synced blocks.
694 1 : const blockSize = 4096
695 1 : for i := 0; i < len(f.mu.data); i += blockSize {
696 1 : if cfg.UnsyncedDataPercent > 0 && cfg.RNG.IntN(100) < cfg.UnsyncedDataPercent {
697 0 : block := f.mu.data[i:min(i+blockSize, len(f.mu.data))]
698 0 : if grow := i + len(block) - len(newNode.mu.data); grow > 0 {
699 0 : // Grow the file, leaving 0s for any unsynced blocks past the synced
700 0 : // length.
701 0 : newNode.mu.data = append(newNode.mu.data, make([]byte, grow)...)
702 0 : }
703 0 : copy(newNode.mu.data[i:], block)
704 : }
705 : }
706 1 : newNode.mu.syncedData = slices.Clone(newNode.mu.data)
707 : }
708 1 : return newNode
709 : }
710 :
711 : // memFile is a reader or writer of a node's data. Implements File.
712 : type memFile struct {
713 : name string
714 : n *memNode
715 : fs *MemFS // nil for a standalone memFile
716 : pos int
717 : read, write bool
718 : }
719 :
720 : var _ File = (*memFile)(nil)
721 :
722 1 : func (f *memFile) Close() error {
723 1 : if n := f.n.refs.Add(-1); n < 0 {
724 0 : panic(fmt.Sprintf("pebble: close of unopened file: %d", n))
725 : }
726 : // Set node pointer to nil, to cause panic on any subsequent method call. This
727 : // is a defence-in-depth to catch use-after-close or double-close bugs.
728 1 : f.n = nil
729 1 : return nil
730 : }
731 :
732 1 : func (f *memFile) Read(p []byte) (int, error) {
733 1 : if !f.read {
734 0 : return 0, errors.New("pebble/vfs: file was not opened for reading")
735 0 : }
736 1 : if f.n.isDir {
737 0 : return 0, errors.New("pebble/vfs: cannot read a directory")
738 0 : }
739 1 : f.n.mu.Lock()
740 1 : defer f.n.mu.Unlock()
741 1 : if f.pos >= len(f.n.mu.data) {
742 1 : return 0, io.EOF
743 1 : }
744 1 : n := copy(p, f.n.mu.data[f.pos:])
745 1 : f.pos += n
746 1 : return n, nil
747 : }
748 :
749 1 : func (f *memFile) ReadAt(p []byte, off int64) (int, error) {
750 1 : if !f.read {
751 0 : return 0, errors.New("pebble/vfs: file was not opened for reading")
752 0 : }
753 1 : if f.n.isDir {
754 0 : return 0, errors.New("pebble/vfs: cannot read a directory")
755 0 : }
756 1 : f.n.mu.Lock()
757 1 : defer f.n.mu.Unlock()
758 1 : if off >= int64(len(f.n.mu.data)) {
759 0 : return 0, io.EOF
760 0 : }
761 1 : n := copy(p, f.n.mu.data[off:])
762 1 : if n < len(p) {
763 0 : return n, io.EOF
764 0 : }
765 1 : return n, nil
766 : }
767 :
768 1 : func (f *memFile) Write(p []byte) (int, error) {
769 1 : if f.fs.crashable {
770 1 : f.fs.cloneMu.RLock()
771 1 : defer f.fs.cloneMu.RUnlock()
772 1 : }
773 1 : if !f.write {
774 0 : return 0, errors.New("pebble/vfs: file was not created for writing")
775 0 : }
776 1 : if f.n.isDir {
777 0 : return 0, errors.New("pebble/vfs: cannot write a directory")
778 0 : }
779 1 : f.n.mu.Lock()
780 1 : defer f.n.mu.Unlock()
781 1 : f.n.mu.modTime = time.Now()
782 1 : if f.pos+len(p) <= len(f.n.mu.data) {
783 1 : n := copy(f.n.mu.data[f.pos:f.pos+len(p)], p)
784 1 : if n != len(p) {
785 0 : panic("stuff")
786 : }
787 1 : } else {
788 1 : if grow := f.pos - len(f.n.mu.data); grow > 0 {
789 0 : f.n.mu.data = append(f.n.mu.data, make([]byte, grow)...)
790 0 : }
791 1 : f.n.mu.data = append(f.n.mu.data[:f.pos], p...)
792 : }
793 1 : f.pos += len(p)
794 1 :
795 1 : if invariants.Enabled {
796 1 : // Mutate the input buffer to flush out bugs in Pebble which expect the
797 1 : // input buffer to be unmodified.
798 1 : for i := range p {
799 1 : p[i] ^= 0xff
800 1 : }
801 : }
802 1 : return len(p), nil
803 : }
804 :
805 1 : func (f *memFile) WriteAt(p []byte, ofs int64) (int, error) {
806 1 : if f.fs.crashable {
807 1 : f.fs.cloneMu.RLock()
808 1 : defer f.fs.cloneMu.RUnlock()
809 1 : }
810 1 : if !f.write {
811 0 : return 0, errors.New("pebble/vfs: file was not created for writing")
812 0 : }
813 1 : if f.n.isDir {
814 0 : return 0, errors.New("pebble/vfs: cannot write a directory")
815 0 : }
816 1 : f.n.mu.Lock()
817 1 : defer f.n.mu.Unlock()
818 1 : f.n.mu.modTime = time.Now()
819 1 :
820 1 : for len(f.n.mu.data) < int(ofs)+len(p) {
821 1 : f.n.mu.data = append(f.n.mu.data, 0)
822 1 : }
823 :
824 1 : n := copy(f.n.mu.data[int(ofs):int(ofs)+len(p)], p)
825 1 : if n != len(p) {
826 0 : panic("stuff")
827 : }
828 :
829 1 : return len(p), nil
830 : }
831 :
832 1 : func (f *memFile) Prefetch(offset int64, length int64) error { return nil }
833 1 : func (f *memFile) Preallocate(offset, length int64) error { return nil }
834 :
835 1 : func (f *memFile) Stat() (FileInfo, error) {
836 1 : f.n.mu.Lock()
837 1 : defer f.n.mu.Unlock()
838 1 : return &memFileInfo{
839 1 : name: f.name,
840 1 : size: int64(len(f.n.mu.data)),
841 1 : modTime: f.n.mu.modTime,
842 1 : isDir: f.n.isDir,
843 1 : }, nil
844 1 : }
845 :
846 1 : func (f *memFile) Sync() error {
847 1 : if f.fs == nil || !f.fs.crashable {
848 1 : return nil
849 1 : }
850 1 : f.fs.cloneMu.RLock()
851 1 : defer f.fs.cloneMu.RUnlock()
852 1 : f.fs.mu.Lock()
853 1 : defer f.fs.mu.Unlock()
854 1 : if f.n.isDir {
855 1 : f.n.syncedChildren = cloneChildren(f.n.children)
856 1 : } else {
857 1 : f.n.mu.Lock()
858 1 : f.n.mu.syncedData = append(f.n.mu.syncedData[:0], f.n.mu.data...)
859 1 : f.n.mu.Unlock()
860 1 : }
861 1 : return nil
862 : }
863 :
864 1 : func (f *memFile) SyncData() error {
865 1 : return f.Sync()
866 1 : }
867 :
868 0 : func (f *memFile) SyncTo(length int64) (fullSync bool, err error) {
869 0 : // NB: This SyncTo implementation lies, with its return values claiming it
870 0 : // synced the data up to `length`. When fullSync=false, SyncTo provides no
871 0 : // durability guarantees, so this can help surface bugs where we improperly
872 0 : // rely on SyncTo providing durability.
873 0 : return false, nil
874 0 : }
875 :
876 1 : func (f *memFile) Fd() uintptr {
877 1 : return InvalidFd
878 1 : }
879 :
880 : // Flush is a no-op and present only to prevent buffering at higher levels
881 : // (e.g. it prevents sstable.Writer from using a bufio.Writer).
882 0 : func (f *memFile) Flush() error {
883 0 : return nil
884 0 : }
885 :
886 : // memFileInfo implements os.FileInfo for a memFile.
887 : type memFileInfo struct {
888 : name string
889 : size int64
890 : modTime time.Time
891 : isDir bool
892 : }
893 :
894 : var _ os.FileInfo = (*memFileInfo)(nil)
895 :
896 0 : func (f *memFileInfo) Name() string {
897 0 : return f.name
898 0 : }
899 :
900 0 : func (f *memFileInfo) DeviceID() DeviceID {
901 0 : return DeviceID{}
902 0 : }
903 :
904 1 : func (f *memFileInfo) Size() int64 {
905 1 : return f.size
906 1 : }
907 :
908 0 : func (f *memFileInfo) Mode() os.FileMode {
909 0 : if f.isDir {
910 0 : return os.ModeDir | 0755
911 0 : }
912 0 : return 0755
913 : }
914 :
915 0 : func (f *memFileInfo) ModTime() time.Time {
916 0 : return f.modTime
917 0 : }
918 :
919 0 : func (f *memFileInfo) IsDir() bool {
920 0 : return f.isDir
921 0 : }
922 :
923 0 : func (f *memFileInfo) Sys() interface{} {
924 0 : return nil
925 0 : }
926 :
927 : type memFileLock struct {
928 : y *MemFS
929 : f File
930 : fullname string
931 : }
932 :
933 1 : func (l *memFileLock) Close() error {
934 1 : if l.y == nil {
935 0 : return nil
936 0 : }
937 1 : l.y.lockedFiles.Delete(l.fullname)
938 1 : l.y = nil
939 1 : return l.f.Close()
940 : }
|