Line data Source code
1 : // Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package pebble
6 :
7 : import (
8 : "bytes"
9 : "context"
10 : "fmt"
11 : "io"
12 : "runtime/debug"
13 : "runtime/pprof"
14 : "sync"
15 : "sync/atomic"
16 : "unsafe"
17 :
18 : "github.com/cockroachdb/errors"
19 : "github.com/cockroachdb/pebble/internal/base"
20 : "github.com/cockroachdb/pebble/internal/invariants"
21 : "github.com/cockroachdb/pebble/internal/keyspan"
22 : "github.com/cockroachdb/pebble/internal/manifest"
23 : "github.com/cockroachdb/pebble/internal/private"
24 : "github.com/cockroachdb/pebble/objstorage"
25 : "github.com/cockroachdb/pebble/objstorage/objstorageprovider/objiotracing"
26 : "github.com/cockroachdb/pebble/sstable"
27 : )
28 :
29 : var emptyIter = &errorIter{err: nil}
30 : var emptyKeyspanIter = &errorKeyspanIter{err: nil}
31 :
32 : // filteredAll is a singleton internalIterator implementation used when an
33 : // sstable does contain point keys, but all the keys are filtered by the active
34 : // PointKeyFilters set in the iterator's IterOptions.
35 : //
36 : // filteredAll implements filteredIter, ensuring the level iterator recognizes
37 : // when it may need to return file boundaries to keep the rangeDelIter open
38 : // during mergingIter operation.
39 : var filteredAll = &filteredAllKeysIter{errorIter: errorIter{err: nil}}
40 :
41 : var _ filteredIter = filteredAll
42 :
43 : type filteredAllKeysIter struct {
44 : errorIter
45 : }
46 :
47 1 : func (s *filteredAllKeysIter) MaybeFilteredKeys() bool {
48 1 : return true
49 1 : }
50 :
51 : var tableCacheLabels = pprof.Labels("pebble", "table-cache")
52 :
53 : // tableCacheOpts contains the db specific fields
54 : // of a table cache. This is stored in the tableCacheContainer
55 : // along with the table cache.
56 : // NB: It is important to make sure that the fields in this
57 : // struct are read-only. Since the fields here are shared
58 : // by every single tableCacheShard, if non read-only fields
59 : // are updated, we could have unnecessary evictions of those
60 : // fields, and the surrounding fields from the CPU caches.
61 : type tableCacheOpts struct {
62 : // iterCount keeps track of how many iterators are open. It is used to keep
63 : // track of leaked iterators on a per-db level.
64 : iterCount *atomic.Int32
65 :
66 : loggerAndTracer LoggerAndTracer
67 : cacheID uint64
68 : objProvider objstorage.Provider
69 : opts sstable.ReaderOptions
70 : filterMetrics *sstable.FilterMetricsTracker
71 : sstStatsCollector *sstable.CategoryStatsCollector
72 : }
73 :
74 : // tableCacheContainer contains the table cache and
75 : // fields which are unique to the DB.
76 : type tableCacheContainer struct {
77 : tableCache *TableCache
78 :
79 : // dbOpts contains fields relevant to the table cache
80 : // which are unique to each DB.
81 : dbOpts tableCacheOpts
82 : }
83 :
84 : // newTableCacheContainer will panic if the underlying cache in the table cache
85 : // doesn't match Options.Cache.
86 : func newTableCacheContainer(
87 : tc *TableCache,
88 : cacheID uint64,
89 : objProvider objstorage.Provider,
90 : opts *Options,
91 : size int,
92 : sstStatsCollector *sstable.CategoryStatsCollector,
93 1 : ) *tableCacheContainer {
94 1 : // We will release a ref to table cache acquired here when tableCacheContainer.close is called.
95 1 : if tc != nil {
96 0 : if tc.cache != opts.Cache {
97 0 : panic("pebble: underlying cache for the table cache and db are different")
98 : }
99 0 : tc.Ref()
100 1 : } else {
101 1 : // NewTableCache should create a ref to tc which the container should
102 1 : // drop whenever it is closed.
103 1 : tc = NewTableCache(opts.Cache, opts.Experimental.TableCacheShards, size)
104 1 : }
105 :
106 1 : t := &tableCacheContainer{}
107 1 : t.tableCache = tc
108 1 : t.dbOpts.loggerAndTracer = opts.LoggerAndTracer
109 1 : t.dbOpts.cacheID = cacheID
110 1 : t.dbOpts.objProvider = objProvider
111 1 : t.dbOpts.opts = opts.MakeReaderOptions()
112 1 : t.dbOpts.filterMetrics = &sstable.FilterMetricsTracker{}
113 1 : t.dbOpts.iterCount = new(atomic.Int32)
114 1 : t.dbOpts.sstStatsCollector = sstStatsCollector
115 1 : return t
116 : }
117 :
118 : // Before calling close, make sure that there will be no further need
119 : // to access any of the files associated with the store.
120 1 : func (c *tableCacheContainer) close() error {
121 1 : // We want to do some cleanup work here. Check for leaked iterators
122 1 : // by the DB using this container. Note that we'll still perform cleanup
123 1 : // below in the case that there are leaked iterators.
124 1 : var err error
125 1 : if v := c.dbOpts.iterCount.Load(); v > 0 {
126 0 : err = errors.Errorf("leaked iterators: %d", errors.Safe(v))
127 0 : }
128 :
129 : // Release nodes here.
130 1 : for _, shard := range c.tableCache.shards {
131 1 : if shard != nil {
132 1 : shard.removeDB(&c.dbOpts)
133 1 : }
134 : }
135 1 : return firstError(err, c.tableCache.Unref())
136 : }
137 :
138 : func (c *tableCacheContainer) newIters(
139 : ctx context.Context,
140 : file *manifest.FileMetadata,
141 : opts *IterOptions,
142 : internalOpts internalIterOpts,
143 1 : ) (internalIterator, keyspan.FragmentIterator, error) {
144 1 : return c.tableCache.getShard(file.FileBacking.DiskFileNum).newIters(ctx, file, opts, internalOpts, &c.dbOpts)
145 1 : }
146 :
147 : func (c *tableCacheContainer) newRangeKeyIter(
148 : file *manifest.FileMetadata, opts keyspan.SpanIterOptions,
149 1 : ) (keyspan.FragmentIterator, error) {
150 1 : return c.tableCache.getShard(file.FileBacking.DiskFileNum).newRangeKeyIter(file, opts, &c.dbOpts)
151 1 : }
152 :
153 : // getTableProperties returns the properties associated with the backing physical
154 : // table if the input metadata belongs to a virtual sstable.
155 0 : func (c *tableCacheContainer) getTableProperties(file *fileMetadata) (*sstable.Properties, error) {
156 0 : return c.tableCache.getShard(file.FileBacking.DiskFileNum).getTableProperties(file, &c.dbOpts)
157 0 : }
158 :
159 1 : func (c *tableCacheContainer) evict(fileNum base.DiskFileNum) {
160 1 : c.tableCache.getShard(fileNum).evict(fileNum, &c.dbOpts, false)
161 1 : }
162 :
163 1 : func (c *tableCacheContainer) metrics() (CacheMetrics, FilterMetrics) {
164 1 : var m CacheMetrics
165 1 : for i := range c.tableCache.shards {
166 1 : s := c.tableCache.shards[i]
167 1 : s.mu.RLock()
168 1 : m.Count += int64(len(s.mu.nodes))
169 1 : s.mu.RUnlock()
170 1 : m.Hits += s.hits.Load()
171 1 : m.Misses += s.misses.Load()
172 1 : }
173 1 : m.Size = m.Count * int64(unsafe.Sizeof(sstable.Reader{}))
174 1 : f := c.dbOpts.filterMetrics.Load()
175 1 : return m, f
176 : }
177 :
178 : func (c *tableCacheContainer) estimateSize(
179 : meta *fileMetadata, lower, upper []byte,
180 1 : ) (size uint64, err error) {
181 1 : if meta.Virtual {
182 1 : err = c.withVirtualReader(
183 1 : meta.VirtualMeta(),
184 1 : func(r sstable.VirtualReader) (err error) {
185 1 : size, err = r.EstimateDiskUsage(lower, upper)
186 1 : return err
187 1 : },
188 : )
189 1 : } else {
190 1 : err = c.withReader(
191 1 : meta.PhysicalMeta(),
192 1 : func(r *sstable.Reader) (err error) {
193 1 : size, err = r.EstimateDiskUsage(lower, upper)
194 1 : return err
195 1 : },
196 : )
197 : }
198 1 : if err != nil {
199 0 : return 0, err
200 0 : }
201 1 : return size, nil
202 : }
203 :
204 : // createCommonReader creates a Reader for this file. isForeign, if true for
205 : // virtual sstables, is passed into the vSSTable reader so its iterators can
206 : // collapse obsolete points accordingly.
207 : func createCommonReader(
208 : v *tableCacheValue, file *fileMetadata, isForeign bool,
209 1 : ) sstable.CommonReader {
210 1 : // TODO(bananabrick): We suffer an allocation if file is a virtual sstable.
211 1 : var cr sstable.CommonReader = v.reader
212 1 : if file.Virtual {
213 1 : virtualReader := sstable.MakeVirtualReader(
214 1 : v.reader, file.VirtualMeta(), isForeign,
215 1 : )
216 1 : cr = &virtualReader
217 1 : }
218 1 : return cr
219 : }
220 :
221 : func (c *tableCacheContainer) withCommonReader(
222 : meta *fileMetadata, fn func(sstable.CommonReader) error,
223 1 : ) error {
224 1 : s := c.tableCache.getShard(meta.FileBacking.DiskFileNum)
225 1 : v := s.findNode(meta, &c.dbOpts)
226 1 : defer s.unrefValue(v)
227 1 : if v.err != nil {
228 0 : return v.err
229 0 : }
230 1 : provider := c.dbOpts.objProvider
231 1 : objMeta, err := provider.Lookup(fileTypeTable, meta.FileBacking.DiskFileNum)
232 1 : if err != nil {
233 0 : return err
234 0 : }
235 1 : return fn(createCommonReader(v, meta, provider.IsSharedForeign(objMeta)))
236 : }
237 :
238 1 : func (c *tableCacheContainer) withReader(meta physicalMeta, fn func(*sstable.Reader) error) error {
239 1 : s := c.tableCache.getShard(meta.FileBacking.DiskFileNum)
240 1 : v := s.findNode(meta.FileMetadata, &c.dbOpts)
241 1 : defer s.unrefValue(v)
242 1 : if v.err != nil {
243 0 : return v.err
244 0 : }
245 1 : return fn(v.reader)
246 : }
247 :
248 : // withVirtualReader fetches a VirtualReader associated with a virtual sstable.
249 : func (c *tableCacheContainer) withVirtualReader(
250 : meta virtualMeta, fn func(sstable.VirtualReader) error,
251 1 : ) error {
252 1 : s := c.tableCache.getShard(meta.FileBacking.DiskFileNum)
253 1 : v := s.findNode(meta.FileMetadata, &c.dbOpts)
254 1 : defer s.unrefValue(v)
255 1 : if v.err != nil {
256 0 : return v.err
257 0 : }
258 1 : provider := c.dbOpts.objProvider
259 1 : objMeta, err := provider.Lookup(fileTypeTable, meta.FileBacking.DiskFileNum)
260 1 : if err != nil {
261 0 : return err
262 0 : }
263 1 : return fn(sstable.MakeVirtualReader(v.reader, meta, provider.IsSharedForeign(objMeta)))
264 : }
265 :
266 1 : func (c *tableCacheContainer) iterCount() int64 {
267 1 : return int64(c.dbOpts.iterCount.Load())
268 1 : }
269 :
270 : // TableCache is a shareable cache for open sstables.
271 : type TableCache struct {
272 : refs atomic.Int64
273 :
274 : cache *Cache
275 : shards []*tableCacheShard
276 : }
277 :
278 : // Ref adds a reference to the table cache. Once tableCache.init returns,
279 : // the table cache only remains valid if there is at least one reference
280 : // to it.
281 0 : func (c *TableCache) Ref() {
282 0 : v := c.refs.Add(1)
283 0 : // We don't want the reference count to ever go from 0 -> 1,
284 0 : // cause a reference count of 0 implies that we've closed the cache.
285 0 : if v <= 1 {
286 0 : panic(fmt.Sprintf("pebble: inconsistent reference count: %d", v))
287 : }
288 : }
289 :
290 : // Unref removes a reference to the table cache.
291 1 : func (c *TableCache) Unref() error {
292 1 : v := c.refs.Add(-1)
293 1 : switch {
294 0 : case v < 0:
295 0 : panic(fmt.Sprintf("pebble: inconsistent reference count: %d", v))
296 1 : case v == 0:
297 1 : var err error
298 1 : for i := range c.shards {
299 1 : // The cache shard is not allocated yet, nothing to close
300 1 : if c.shards[i] == nil {
301 0 : continue
302 : }
303 1 : err = firstError(err, c.shards[i].Close())
304 : }
305 :
306 : // Unref the cache which we create a reference to when the tableCache
307 : // is first instantiated.
308 1 : c.cache.Unref()
309 1 : return err
310 : }
311 0 : return nil
312 : }
313 :
314 : // NewTableCache will create a reference to the table cache. It is the callers responsibility
315 : // to call tableCache.Unref if they will no longer hold a reference to the table cache.
316 1 : func NewTableCache(cache *Cache, numShards int, size int) *TableCache {
317 1 : if size == 0 {
318 0 : panic("pebble: cannot create a table cache of size 0")
319 1 : } else if numShards == 0 {
320 0 : panic("pebble: cannot create a table cache with 0 shards")
321 : }
322 :
323 1 : c := &TableCache{}
324 1 : c.cache = cache
325 1 : c.cache.Ref()
326 1 :
327 1 : c.shards = make([]*tableCacheShard, numShards)
328 1 : for i := range c.shards {
329 1 : c.shards[i] = &tableCacheShard{}
330 1 : c.shards[i].init(size / len(c.shards))
331 1 : }
332 :
333 : // Hold a ref to the cache here.
334 1 : c.refs.Store(1)
335 1 :
336 1 : return c
337 : }
338 :
339 1 : func (c *TableCache) getShard(fileNum base.DiskFileNum) *tableCacheShard {
340 1 : return c.shards[uint64(fileNum.FileNum())%uint64(len(c.shards))]
341 1 : }
342 :
343 : type tableCacheKey struct {
344 : cacheID uint64
345 : fileNum base.DiskFileNum
346 : }
347 :
348 : type tableCacheShard struct {
349 : hits atomic.Int64
350 : misses atomic.Int64
351 : iterCount atomic.Int32
352 :
353 : size int
354 :
355 : mu struct {
356 : sync.RWMutex
357 : nodes map[tableCacheKey]*tableCacheNode
358 : // The iters map is only created and populated in race builds.
359 : iters map[io.Closer][]byte
360 :
361 : handHot *tableCacheNode
362 : handCold *tableCacheNode
363 : handTest *tableCacheNode
364 :
365 : coldTarget int
366 : sizeHot int
367 : sizeCold int
368 : sizeTest int
369 : }
370 : releasing sync.WaitGroup
371 : releasingCh chan *tableCacheValue
372 : releaseLoopExit sync.WaitGroup
373 : }
374 :
375 1 : func (c *tableCacheShard) init(size int) {
376 1 : c.size = size
377 1 :
378 1 : c.mu.nodes = make(map[tableCacheKey]*tableCacheNode)
379 1 : c.mu.coldTarget = size
380 1 : c.releasingCh = make(chan *tableCacheValue, 100)
381 1 : c.releaseLoopExit.Add(1)
382 1 : go c.releaseLoop()
383 1 :
384 1 : if invariants.RaceEnabled {
385 0 : c.mu.iters = make(map[io.Closer][]byte)
386 0 : }
387 : }
388 :
389 1 : func (c *tableCacheShard) releaseLoop() {
390 1 : pprof.Do(context.Background(), tableCacheLabels, func(context.Context) {
391 1 : defer c.releaseLoopExit.Done()
392 1 : for v := range c.releasingCh {
393 1 : v.release(c)
394 1 : }
395 : })
396 : }
397 :
398 : // checkAndIntersectFilters checks the specific table and block property filters
399 : // for intersection with any available table and block-level properties. Returns
400 : // true for ok if this table should be read by this iterator.
401 : func (c *tableCacheShard) checkAndIntersectFilters(
402 : v *tableCacheValue,
403 : tableFilter func(userProps map[string]string) bool,
404 : blockPropertyFilters []BlockPropertyFilter,
405 : boundLimitedFilter sstable.BoundLimitedBlockPropertyFilter,
406 1 : ) (ok bool, filterer *sstable.BlockPropertiesFilterer, err error) {
407 1 : if tableFilter != nil &&
408 1 : !tableFilter(v.reader.Properties.UserProperties) {
409 0 : return false, nil, nil
410 0 : }
411 :
412 1 : if boundLimitedFilter != nil || len(blockPropertyFilters) > 0 {
413 1 : filterer, err = sstable.IntersectsTable(
414 1 : blockPropertyFilters,
415 1 : boundLimitedFilter,
416 1 : v.reader.Properties.UserProperties,
417 1 : )
418 1 : // NB: IntersectsTable will return a nil filterer if the table-level
419 1 : // properties indicate there's no intersection with the provided filters.
420 1 : if filterer == nil || err != nil {
421 1 : return false, nil, err
422 1 : }
423 : }
424 1 : return true, filterer, nil
425 : }
426 :
427 : func (c *tableCacheShard) newIters(
428 : ctx context.Context,
429 : file *manifest.FileMetadata,
430 : opts *IterOptions,
431 : internalOpts internalIterOpts,
432 : dbOpts *tableCacheOpts,
433 1 : ) (internalIterator, keyspan.FragmentIterator, error) {
434 1 : // TODO(sumeer): constructing the Reader should also use a plumbed context,
435 1 : // since parts of the sstable are read during the construction. The Reader
436 1 : // should not remember that context since the Reader can be long-lived.
437 1 :
438 1 : // Calling findNode gives us the responsibility of decrementing v's
439 1 : // refCount. If opening the underlying table resulted in error, then we
440 1 : // decrement this straight away. Otherwise, we pass that responsibility to
441 1 : // the sstable iterator, which decrements when it is closed.
442 1 : v := c.findNode(file, dbOpts)
443 1 : if v.err != nil {
444 0 : defer c.unrefValue(v)
445 0 : return nil, nil, v.err
446 0 : }
447 :
448 1 : hideObsoletePoints := false
449 1 : var pointKeyFilters []BlockPropertyFilter
450 1 : if opts != nil {
451 1 : // This code is appending (at most one filter) in-place to
452 1 : // opts.PointKeyFilters even though the slice is shared for iterators in
453 1 : // the same iterator tree. This is acceptable since all the following
454 1 : // properties are true:
455 1 : // - The iterator tree is single threaded, so the shared backing for the
456 1 : // slice is being mutated in a single threaded manner.
457 1 : // - Each shallow copy of the slice has its own notion of length.
458 1 : // - The appended element is always the obsoleteKeyBlockPropertyFilter
459 1 : // struct, which is stateless, so overwriting that struct when creating
460 1 : // one sstable iterator is harmless to other sstable iterators that are
461 1 : // relying on that struct.
462 1 : //
463 1 : // An alternative would be to have different slices for different sstable
464 1 : // iterators, but that requires more work to avoid allocations.
465 1 : //
466 1 : // TODO(bilal): for compaction reads of foreign sstables, we do hide
467 1 : // obsolete points (see sstable.Reader.newCompactionIter) but we don't
468 1 : // apply the obsolete block property filter. We could optimize this by
469 1 : // applying the filter.
470 1 : hideObsoletePoints, pointKeyFilters =
471 1 : v.reader.TryAddBlockPropertyFilterForHideObsoletePoints(
472 1 : opts.snapshotForHideObsoletePoints, file.LargestSeqNum, opts.PointKeyFilters)
473 1 : }
474 1 : ok := true
475 1 : var filterer *sstable.BlockPropertiesFilterer
476 1 : var err error
477 1 : if opts != nil {
478 1 : ok, filterer, err = c.checkAndIntersectFilters(v, opts.TableFilter,
479 1 : pointKeyFilters, internalOpts.boundLimitedFilter)
480 1 : }
481 1 : if err != nil {
482 0 : c.unrefValue(v)
483 0 : return nil, nil, err
484 0 : }
485 :
486 1 : provider := dbOpts.objProvider
487 1 : // Check if this file is a foreign file.
488 1 : objMeta, err := provider.Lookup(fileTypeTable, file.FileBacking.DiskFileNum)
489 1 : if err != nil {
490 0 : return nil, nil, err
491 0 : }
492 :
493 : // Note: This suffers an allocation for virtual sstables.
494 1 : cr := createCommonReader(v, file, provider.IsSharedForeign(objMeta))
495 1 :
496 1 : // NB: range-del iterator does not maintain a reference to the table, nor
497 1 : // does it need to read from it after creation.
498 1 : rangeDelIter, err := cr.NewRawRangeDelIter()
499 1 : if err != nil {
500 0 : c.unrefValue(v)
501 0 : return nil, nil, err
502 0 : }
503 :
504 : // Assert expected bounds in tests.
505 1 : if invariants.Enabled && rangeDelIter != nil {
506 1 : cmp := base.DefaultComparer.Compare
507 1 : if dbOpts.opts.Comparer != nil {
508 1 : cmp = dbOpts.opts.Comparer.Compare
509 1 : }
510 : // TODO(radu): we should be using AssertBounds, but it currently fails in
511 : // some cases (#3167).
512 1 : rangeDelIter = keyspan.AssertUserKeyBounds(
513 1 : rangeDelIter, file.SmallestPointKey.UserKey, file.LargestPointKey.UserKey, cmp,
514 1 : )
515 : }
516 :
517 1 : if !ok {
518 1 : c.unrefValue(v)
519 1 : // Return an empty iterator. This iterator has no mutable state, so
520 1 : // using a singleton is fine.
521 1 : // NB: We still return the potentially non-empty rangeDelIter. This
522 1 : // ensures the iterator observes the file's range deletions even if the
523 1 : // block property filters exclude all the file's point keys. The range
524 1 : // deletions may still delete keys lower in the LSM in files that DO
525 1 : // match the active filters.
526 1 : //
527 1 : // The point iterator returned must implement the filteredIter
528 1 : // interface, so that the level iterator surfaces file boundaries when
529 1 : // range deletions are present.
530 1 : return filteredAll, rangeDelIter, err
531 1 : }
532 :
533 1 : var iter sstable.Iterator
534 1 : useFilter := true
535 1 : if opts != nil {
536 1 : useFilter = manifest.LevelToInt(opts.level) != 6 || opts.UseL6Filters
537 1 : ctx = objiotracing.WithLevel(ctx, manifest.LevelToInt(opts.level))
538 1 : }
539 1 : tableFormat, err := v.reader.TableFormat()
540 1 : if err != nil {
541 0 : return nil, nil, err
542 0 : }
543 1 : var rp sstable.ReaderProvider
544 1 : if tableFormat >= sstable.TableFormatPebblev3 && v.reader.Properties.NumValueBlocks > 0 {
545 1 : rp = &tableCacheShardReaderProvider{c: c, file: file, dbOpts: dbOpts}
546 1 : }
547 :
548 1 : if objMeta.IsShared() && v.reader.Properties.GlobalSeqNum != 0 {
549 1 : if tableFormat < sstable.TableFormatPebblev4 {
550 0 : return nil, nil, errors.New("pebble: shared ingested sstable has a lower table format than expected")
551 0 : }
552 : // The table is shared and ingested.
553 1 : hideObsoletePoints = true
554 : }
555 1 : var categoryAndQoS sstable.CategoryAndQoS
556 1 : if opts != nil {
557 1 : categoryAndQoS = opts.CategoryAndQoS
558 1 : }
559 1 : if internalOpts.bytesIterated != nil {
560 1 : iter, err = cr.NewCompactionIter(
561 1 : internalOpts.bytesIterated, categoryAndQoS, dbOpts.sstStatsCollector, rp,
562 1 : internalOpts.bufferPool)
563 1 : } else {
564 1 : iter, err = cr.NewIterWithBlockPropertyFiltersAndContextEtc(
565 1 : ctx, opts.GetLowerBound(), opts.GetUpperBound(), filterer, hideObsoletePoints, useFilter,
566 1 : internalOpts.stats, categoryAndQoS, dbOpts.sstStatsCollector, rp)
567 1 : }
568 1 : if err != nil {
569 0 : if rangeDelIter != nil {
570 0 : _ = rangeDelIter.Close()
571 0 : }
572 0 : c.unrefValue(v)
573 0 : return nil, nil, err
574 : }
575 : // NB: v.closeHook takes responsibility for calling unrefValue(v) here. Take
576 : // care to avoid introducing an allocation here by adding a closure.
577 1 : iter.SetCloseHook(v.closeHook)
578 1 :
579 1 : c.iterCount.Add(1)
580 1 : dbOpts.iterCount.Add(1)
581 1 : if invariants.RaceEnabled {
582 0 : c.mu.Lock()
583 0 : c.mu.iters[iter] = debug.Stack()
584 0 : c.mu.Unlock()
585 0 : }
586 1 : return iter, rangeDelIter, nil
587 : }
588 :
589 : func (c *tableCacheShard) newRangeKeyIter(
590 : file *manifest.FileMetadata, opts keyspan.SpanIterOptions, dbOpts *tableCacheOpts,
591 1 : ) (keyspan.FragmentIterator, error) {
592 1 : // Calling findNode gives us the responsibility of decrementing v's
593 1 : // refCount. If opening the underlying table resulted in error, then we
594 1 : // decrement this straight away. Otherwise, we pass that responsibility to
595 1 : // the sstable iterator, which decrements when it is closed.
596 1 : v := c.findNode(file, dbOpts)
597 1 : if v.err != nil {
598 0 : defer c.unrefValue(v)
599 0 : return nil, v.err
600 0 : }
601 :
602 1 : ok := true
603 1 : var err error
604 1 : // Don't filter a table's range keys if the file contains RANGEKEYDELs.
605 1 : // The RANGEKEYDELs may delete range keys in other levels. Skipping the
606 1 : // file's range key blocks may surface deleted range keys below. This is
607 1 : // done here, rather than deferring to the block-property collector in order
608 1 : // to maintain parity with point keys and the treatment of RANGEDELs.
609 1 : if v.reader.Properties.NumRangeKeyDels == 0 {
610 1 : ok, _, err = c.checkAndIntersectFilters(v, nil, opts.RangeKeyFilters, nil)
611 1 : }
612 1 : if err != nil {
613 0 : c.unrefValue(v)
614 0 : return nil, err
615 0 : }
616 1 : if !ok {
617 0 : c.unrefValue(v)
618 0 : // Return the empty iterator. This iterator has no mutable state, so
619 0 : // using a singleton is fine.
620 0 : return emptyKeyspanIter, err
621 0 : }
622 :
623 1 : var iter keyspan.FragmentIterator
624 1 : if file.Virtual {
625 1 : provider := dbOpts.objProvider
626 1 : var objMeta objstorage.ObjectMetadata
627 1 : objMeta, err = provider.Lookup(fileTypeTable, file.FileBacking.DiskFileNum)
628 1 : if err == nil {
629 1 : virtualReader := sstable.MakeVirtualReader(
630 1 : v.reader, file.VirtualMeta(), provider.IsSharedForeign(objMeta),
631 1 : )
632 1 : iter, err = virtualReader.NewRawRangeKeyIter()
633 1 : }
634 1 : } else {
635 1 : iter, err = v.reader.NewRawRangeKeyIter()
636 1 : }
637 :
638 : // iter is a block iter that holds the entire value of the block in memory.
639 : // No need to hold onto a ref of the cache value.
640 1 : c.unrefValue(v)
641 1 :
642 1 : if err != nil {
643 0 : return nil, err
644 0 : }
645 :
646 1 : if iter == nil {
647 1 : // NewRawRangeKeyIter can return nil even if there's no error. However,
648 1 : // the keyspan.LevelIter expects a non-nil iterator if err is nil.
649 1 : return emptyKeyspanIter, nil
650 1 : }
651 :
652 1 : return iter, nil
653 : }
654 :
655 : type tableCacheShardReaderProvider struct {
656 : c *tableCacheShard
657 : file *manifest.FileMetadata
658 : dbOpts *tableCacheOpts
659 : v *tableCacheValue
660 : }
661 :
662 : var _ sstable.ReaderProvider = &tableCacheShardReaderProvider{}
663 :
664 : // GetReader implements sstable.ReaderProvider. Note that it is not the
665 : // responsibility of tableCacheShardReaderProvider to ensure that the file
666 : // continues to exist. The ReaderProvider is used in iterators where the
667 : // top-level iterator is pinning the read state and preventing the files from
668 : // being deleted.
669 : //
670 : // The caller must call tableCacheShardReaderProvider.Close.
671 : //
672 : // Note that currently the Reader returned here is only used to read value
673 : // blocks. This reader shouldn't be used for other purposes like reading keys
674 : // outside of virtual sstable bounds.
675 : //
676 : // TODO(bananabrick): We could return a wrapper over the Reader to ensure
677 : // that the reader isn't used for other purposes.
678 1 : func (rp *tableCacheShardReaderProvider) GetReader() (*sstable.Reader, error) {
679 1 : // Calling findNode gives us the responsibility of decrementing v's
680 1 : // refCount.
681 1 : v := rp.c.findNode(rp.file, rp.dbOpts)
682 1 : if v.err != nil {
683 0 : defer rp.c.unrefValue(v)
684 0 : return nil, v.err
685 0 : }
686 1 : rp.v = v
687 1 : return v.reader, nil
688 : }
689 :
690 : // Close implements sstable.ReaderProvider.
691 1 : func (rp *tableCacheShardReaderProvider) Close() {
692 1 : rp.c.unrefValue(rp.v)
693 1 : rp.v = nil
694 1 : }
695 :
696 : // getTableProperties return sst table properties for target file
697 : func (c *tableCacheShard) getTableProperties(
698 : file *fileMetadata, dbOpts *tableCacheOpts,
699 0 : ) (*sstable.Properties, error) {
700 0 : // Calling findNode gives us the responsibility of decrementing v's refCount here
701 0 : v := c.findNode(file, dbOpts)
702 0 : defer c.unrefValue(v)
703 0 :
704 0 : if v.err != nil {
705 0 : return nil, v.err
706 0 : }
707 0 : return &v.reader.Properties, nil
708 : }
709 :
710 : // releaseNode releases a node from the tableCacheShard.
711 : //
712 : // c.mu must be held when calling this.
713 0 : func (c *tableCacheShard) releaseNode(n *tableCacheNode) {
714 0 : c.unlinkNode(n)
715 0 : c.clearNode(n)
716 0 : }
717 :
718 : // unlinkNode removes a node from the tableCacheShard, leaving the shard
719 : // reference in place.
720 : //
721 : // c.mu must be held when calling this.
722 1 : func (c *tableCacheShard) unlinkNode(n *tableCacheNode) {
723 1 : key := tableCacheKey{n.cacheID, n.fileNum}
724 1 : delete(c.mu.nodes, key)
725 1 :
726 1 : switch n.ptype {
727 1 : case tableCacheNodeHot:
728 1 : c.mu.sizeHot--
729 1 : case tableCacheNodeCold:
730 1 : c.mu.sizeCold--
731 1 : case tableCacheNodeTest:
732 1 : c.mu.sizeTest--
733 : }
734 :
735 1 : if n == c.mu.handHot {
736 1 : c.mu.handHot = c.mu.handHot.prev()
737 1 : }
738 1 : if n == c.mu.handCold {
739 1 : c.mu.handCold = c.mu.handCold.prev()
740 1 : }
741 1 : if n == c.mu.handTest {
742 1 : c.mu.handTest = c.mu.handTest.prev()
743 1 : }
744 :
745 1 : if n.unlink() == n {
746 1 : // This was the last entry in the cache.
747 1 : c.mu.handHot = nil
748 1 : c.mu.handCold = nil
749 1 : c.mu.handTest = nil
750 1 : }
751 :
752 1 : n.links.prev = nil
753 1 : n.links.next = nil
754 : }
755 :
756 1 : func (c *tableCacheShard) clearNode(n *tableCacheNode) {
757 1 : if v := n.value; v != nil {
758 1 : n.value = nil
759 1 : c.unrefValue(v)
760 1 : }
761 : }
762 :
763 : // unrefValue decrements the reference count for the specified value, releasing
764 : // it if the reference count fell to 0. Note that the value has a reference if
765 : // it is present in tableCacheShard.mu.nodes, so a reference count of 0 means
766 : // the node has already been removed from that map.
767 1 : func (c *tableCacheShard) unrefValue(v *tableCacheValue) {
768 1 : if v.refCount.Add(-1) == 0 {
769 1 : c.releasing.Add(1)
770 1 : c.releasingCh <- v
771 1 : }
772 : }
773 :
774 : // findNode returns the node for the table with the given file number, creating
775 : // that node if it didn't already exist. The caller is responsible for
776 : // decrementing the returned node's refCount.
777 1 : func (c *tableCacheShard) findNode(meta *fileMetadata, dbOpts *tableCacheOpts) *tableCacheValue {
778 1 : v := c.findNodeInternal(meta, dbOpts)
779 1 :
780 1 : // Loading a file before its global sequence number is known (eg,
781 1 : // during ingest before entering the commit pipeline) can pollute
782 1 : // the cache with incorrect state. In invariant builds, verify
783 1 : // that the global sequence number of the returned reader matches.
784 1 : if invariants.Enabled {
785 1 : if v.reader != nil && meta.LargestSeqNum == meta.SmallestSeqNum &&
786 1 : v.reader.Properties.GlobalSeqNum != meta.SmallestSeqNum {
787 0 : panic(errors.AssertionFailedf("file %s loaded from table cache with the wrong global sequence number %d",
788 0 : meta, v.reader.Properties.GlobalSeqNum))
789 : }
790 : }
791 1 : return v
792 : }
793 :
794 : func (c *tableCacheShard) findNodeInternal(
795 : meta *fileMetadata, dbOpts *tableCacheOpts,
796 1 : ) *tableCacheValue {
797 1 : if refs := meta.Refs(); refs <= 0 {
798 0 : panic(errors.AssertionFailedf("attempting to load file %s with refs=%d from table cache",
799 0 : meta, refs))
800 : }
801 : // Fast-path for a hit in the cache.
802 1 : c.mu.RLock()
803 1 : key := tableCacheKey{dbOpts.cacheID, meta.FileBacking.DiskFileNum}
804 1 : if n := c.mu.nodes[key]; n != nil && n.value != nil {
805 1 : // Fast-path hit.
806 1 : //
807 1 : // The caller is responsible for decrementing the refCount.
808 1 : v := n.value
809 1 : v.refCount.Add(1)
810 1 : c.mu.RUnlock()
811 1 : n.referenced.Store(true)
812 1 : c.hits.Add(1)
813 1 : <-v.loaded
814 1 : return v
815 1 : }
816 1 : c.mu.RUnlock()
817 1 :
818 1 : c.mu.Lock()
819 1 :
820 1 : n := c.mu.nodes[key]
821 1 : switch {
822 1 : case n == nil:
823 1 : // Slow-path miss of a non-existent node.
824 1 : n = &tableCacheNode{
825 1 : fileNum: meta.FileBacking.DiskFileNum,
826 1 : ptype: tableCacheNodeCold,
827 1 : }
828 1 : c.addNode(n, dbOpts)
829 1 : c.mu.sizeCold++
830 :
831 1 : case n.value != nil:
832 1 : // Slow-path hit of a hot or cold node.
833 1 : //
834 1 : // The caller is responsible for decrementing the refCount.
835 1 : v := n.value
836 1 : v.refCount.Add(1)
837 1 : n.referenced.Store(true)
838 1 : c.hits.Add(1)
839 1 : c.mu.Unlock()
840 1 : <-v.loaded
841 1 : return v
842 :
843 1 : default:
844 1 : // Slow-path miss of a test node.
845 1 : c.unlinkNode(n)
846 1 : c.mu.coldTarget++
847 1 : if c.mu.coldTarget > c.size {
848 1 : c.mu.coldTarget = c.size
849 1 : }
850 :
851 1 : n.referenced.Store(false)
852 1 : n.ptype = tableCacheNodeHot
853 1 : c.addNode(n, dbOpts)
854 1 : c.mu.sizeHot++
855 : }
856 :
857 1 : c.misses.Add(1)
858 1 :
859 1 : v := &tableCacheValue{
860 1 : loaded: make(chan struct{}),
861 1 : }
862 1 : v.refCount.Store(2)
863 1 : // Cache the closure invoked when an iterator is closed. This avoids an
864 1 : // allocation on every call to newIters.
865 1 : v.closeHook = func(i sstable.Iterator) error {
866 1 : if invariants.RaceEnabled {
867 0 : c.mu.Lock()
868 0 : delete(c.mu.iters, i)
869 0 : c.mu.Unlock()
870 0 : }
871 1 : c.unrefValue(v)
872 1 : c.iterCount.Add(-1)
873 1 : dbOpts.iterCount.Add(-1)
874 1 : return nil
875 : }
876 1 : n.value = v
877 1 :
878 1 : c.mu.Unlock()
879 1 :
880 1 : // Note adding to the cache lists must complete before we begin loading the
881 1 : // table as a failure during load will result in the node being unlinked.
882 1 : pprof.Do(context.Background(), tableCacheLabels, func(context.Context) {
883 1 : v.load(
884 1 : loadInfo{
885 1 : backingFileNum: meta.FileBacking.DiskFileNum,
886 1 : smallestSeqNum: meta.SmallestSeqNum,
887 1 : largestSeqNum: meta.LargestSeqNum,
888 1 : }, c, dbOpts)
889 1 : })
890 1 : return v
891 : }
892 :
893 1 : func (c *tableCacheShard) addNode(n *tableCacheNode, dbOpts *tableCacheOpts) {
894 1 : c.evictNodes()
895 1 : n.cacheID = dbOpts.cacheID
896 1 : key := tableCacheKey{n.cacheID, n.fileNum}
897 1 : c.mu.nodes[key] = n
898 1 :
899 1 : n.links.next = n
900 1 : n.links.prev = n
901 1 : if c.mu.handHot == nil {
902 1 : // First element.
903 1 : c.mu.handHot = n
904 1 : c.mu.handCold = n
905 1 : c.mu.handTest = n
906 1 : } else {
907 1 : c.mu.handHot.link(n)
908 1 : }
909 :
910 1 : if c.mu.handCold == c.mu.handHot {
911 1 : c.mu.handCold = c.mu.handCold.prev()
912 1 : }
913 : }
914 :
915 1 : func (c *tableCacheShard) evictNodes() {
916 1 : for c.size <= c.mu.sizeHot+c.mu.sizeCold && c.mu.handCold != nil {
917 1 : c.runHandCold()
918 1 : }
919 : }
920 :
921 1 : func (c *tableCacheShard) runHandCold() {
922 1 : n := c.mu.handCold
923 1 : if n.ptype == tableCacheNodeCold {
924 1 : if n.referenced.Load() {
925 1 : n.referenced.Store(false)
926 1 : n.ptype = tableCacheNodeHot
927 1 : c.mu.sizeCold--
928 1 : c.mu.sizeHot++
929 1 : } else {
930 1 : c.clearNode(n)
931 1 : n.ptype = tableCacheNodeTest
932 1 : c.mu.sizeCold--
933 1 : c.mu.sizeTest++
934 1 : for c.size < c.mu.sizeTest && c.mu.handTest != nil {
935 0 : c.runHandTest()
936 0 : }
937 : }
938 : }
939 :
940 1 : c.mu.handCold = c.mu.handCold.next()
941 1 :
942 1 : for c.size-c.mu.coldTarget <= c.mu.sizeHot && c.mu.handHot != nil {
943 1 : c.runHandHot()
944 1 : }
945 : }
946 :
947 1 : func (c *tableCacheShard) runHandHot() {
948 1 : if c.mu.handHot == c.mu.handTest && c.mu.handTest != nil {
949 1 : c.runHandTest()
950 1 : if c.mu.handHot == nil {
951 0 : return
952 0 : }
953 : }
954 :
955 1 : n := c.mu.handHot
956 1 : if n.ptype == tableCacheNodeHot {
957 1 : if n.referenced.Load() {
958 1 : n.referenced.Store(false)
959 1 : } else {
960 1 : n.ptype = tableCacheNodeCold
961 1 : c.mu.sizeHot--
962 1 : c.mu.sizeCold++
963 1 : }
964 : }
965 :
966 1 : c.mu.handHot = c.mu.handHot.next()
967 : }
968 :
969 1 : func (c *tableCacheShard) runHandTest() {
970 1 : if c.mu.sizeCold > 0 && c.mu.handTest == c.mu.handCold && c.mu.handCold != nil {
971 1 : c.runHandCold()
972 1 : if c.mu.handTest == nil {
973 0 : return
974 0 : }
975 : }
976 :
977 1 : n := c.mu.handTest
978 1 : if n.ptype == tableCacheNodeTest {
979 1 : c.mu.coldTarget--
980 1 : if c.mu.coldTarget < 0 {
981 0 : c.mu.coldTarget = 0
982 0 : }
983 1 : c.unlinkNode(n)
984 1 : c.clearNode(n)
985 : }
986 :
987 1 : c.mu.handTest = c.mu.handTest.next()
988 : }
989 :
990 1 : func (c *tableCacheShard) evict(fileNum base.DiskFileNum, dbOpts *tableCacheOpts, allowLeak bool) {
991 1 : c.mu.Lock()
992 1 : key := tableCacheKey{dbOpts.cacheID, fileNum}
993 1 : n := c.mu.nodes[key]
994 1 : var v *tableCacheValue
995 1 : if n != nil {
996 1 : // NB: This is equivalent to tableCacheShard.releaseNode(), but we perform
997 1 : // the tableCacheNode.release() call synchronously below to ensure the
998 1 : // sstable file descriptor is closed before returning. Note that
999 1 : // tableCacheShard.releasing needs to be incremented while holding
1000 1 : // tableCacheShard.mu in order to avoid a race with Close()
1001 1 : c.unlinkNode(n)
1002 1 : v = n.value
1003 1 : if v != nil {
1004 1 : if !allowLeak {
1005 1 : if t := v.refCount.Add(-1); t != 0 {
1006 0 : dbOpts.loggerAndTracer.Fatalf("sstable %s: refcount is not zero: %d\n%s", fileNum, t, debug.Stack())
1007 0 : }
1008 : }
1009 1 : c.releasing.Add(1)
1010 : }
1011 : }
1012 :
1013 1 : c.mu.Unlock()
1014 1 :
1015 1 : if v != nil {
1016 1 : v.release(c)
1017 1 : }
1018 :
1019 1 : dbOpts.opts.Cache.EvictFile(dbOpts.cacheID, fileNum)
1020 : }
1021 :
1022 : // removeDB evicts any nodes which have a reference to the DB
1023 : // associated with dbOpts.cacheID. Make sure that there will
1024 : // be no more accesses to the files associated with the DB.
1025 1 : func (c *tableCacheShard) removeDB(dbOpts *tableCacheOpts) {
1026 1 : var fileNums []base.DiskFileNum
1027 1 :
1028 1 : c.mu.RLock()
1029 1 : // Collect the fileNums which need to be cleaned.
1030 1 : var firstNode *tableCacheNode
1031 1 : node := c.mu.handHot
1032 1 : for node != firstNode {
1033 1 : if firstNode == nil {
1034 1 : firstNode = node
1035 1 : }
1036 :
1037 1 : if node.cacheID == dbOpts.cacheID {
1038 1 : fileNums = append(fileNums, node.fileNum)
1039 1 : }
1040 1 : node = node.next()
1041 : }
1042 1 : c.mu.RUnlock()
1043 1 :
1044 1 : // Evict all the nodes associated with the DB.
1045 1 : // This should synchronously close all the files
1046 1 : // associated with the DB.
1047 1 : for _, fileNum := range fileNums {
1048 1 : c.evict(fileNum, dbOpts, true)
1049 1 : }
1050 : }
1051 :
1052 1 : func (c *tableCacheShard) Close() error {
1053 1 : c.mu.Lock()
1054 1 : defer c.mu.Unlock()
1055 1 :
1056 1 : // Check for leaked iterators. Note that we'll still perform cleanup below in
1057 1 : // the case that there are leaked iterators.
1058 1 : var err error
1059 1 : if v := c.iterCount.Load(); v > 0 {
1060 0 : if !invariants.RaceEnabled {
1061 0 : err = errors.Errorf("leaked iterators: %d", errors.Safe(v))
1062 0 : } else {
1063 0 : var buf bytes.Buffer
1064 0 : for _, stack := range c.mu.iters {
1065 0 : fmt.Fprintf(&buf, "%s\n", stack)
1066 0 : }
1067 0 : err = errors.Errorf("leaked iterators: %d\n%s", errors.Safe(v), buf.String())
1068 : }
1069 : }
1070 :
1071 1 : for c.mu.handHot != nil {
1072 0 : n := c.mu.handHot
1073 0 : if n.value != nil {
1074 0 : if n.value.refCount.Add(-1) == 0 {
1075 0 : c.releasing.Add(1)
1076 0 : c.releasingCh <- n.value
1077 0 : }
1078 : }
1079 0 : c.unlinkNode(n)
1080 : }
1081 1 : c.mu.nodes = nil
1082 1 : c.mu.handHot = nil
1083 1 : c.mu.handCold = nil
1084 1 : c.mu.handTest = nil
1085 1 :
1086 1 : // Only shutdown the releasing goroutine if there were no leaked
1087 1 : // iterators. If there were leaked iterators, we leave the goroutine running
1088 1 : // and the releasingCh open so that a subsequent iterator close can
1089 1 : // complete. This behavior is used by iterator leak tests. Leaking the
1090 1 : // goroutine for these tests is less bad not closing the iterator which
1091 1 : // triggers other warnings about block cache handles not being released.
1092 1 : if err != nil {
1093 0 : c.releasing.Wait()
1094 0 : return err
1095 0 : }
1096 :
1097 1 : close(c.releasingCh)
1098 1 : c.releasing.Wait()
1099 1 : c.releaseLoopExit.Wait()
1100 1 : return err
1101 : }
1102 :
1103 : type tableCacheValue struct {
1104 : closeHook func(i sstable.Iterator) error
1105 : reader *sstable.Reader
1106 : err error
1107 : loaded chan struct{}
1108 : // Reference count for the value. The reader is closed when the reference
1109 : // count drops to zero.
1110 : refCount atomic.Int32
1111 : }
1112 :
1113 : type loadInfo struct {
1114 : backingFileNum base.DiskFileNum
1115 : largestSeqNum uint64
1116 : smallestSeqNum uint64
1117 : }
1118 :
1119 1 : func (v *tableCacheValue) load(loadInfo loadInfo, c *tableCacheShard, dbOpts *tableCacheOpts) {
1120 1 : // Try opening the file first.
1121 1 : var f objstorage.Readable
1122 1 : var err error
1123 1 : f, err = dbOpts.objProvider.OpenForReading(
1124 1 : context.TODO(), fileTypeTable, loadInfo.backingFileNum, objstorage.OpenOptions{MustExist: true},
1125 1 : )
1126 1 : if err == nil {
1127 1 : cacheOpts := private.SSTableCacheOpts(dbOpts.cacheID, loadInfo.backingFileNum).(sstable.ReaderOption)
1128 1 : v.reader, err = sstable.NewReader(f, dbOpts.opts, cacheOpts, dbOpts.filterMetrics)
1129 1 : }
1130 1 : if err != nil {
1131 0 : v.err = errors.Wrapf(
1132 0 : err, "pebble: backing file %s error", errors.Safe(loadInfo.backingFileNum.FileNum()))
1133 0 : }
1134 1 : if v.err == nil && loadInfo.smallestSeqNum == loadInfo.largestSeqNum {
1135 1 : v.reader.Properties.GlobalSeqNum = loadInfo.largestSeqNum
1136 1 : }
1137 1 : if v.err != nil {
1138 0 : c.mu.Lock()
1139 0 : defer c.mu.Unlock()
1140 0 : // Lookup the node in the cache again as it might have already been
1141 0 : // removed.
1142 0 : key := tableCacheKey{dbOpts.cacheID, loadInfo.backingFileNum}
1143 0 : n := c.mu.nodes[key]
1144 0 : if n != nil && n.value == v {
1145 0 : c.releaseNode(n)
1146 0 : }
1147 : }
1148 1 : close(v.loaded)
1149 : }
1150 :
1151 1 : func (v *tableCacheValue) release(c *tableCacheShard) {
1152 1 : <-v.loaded
1153 1 : // Nothing to be done about an error at this point. Close the reader if it is
1154 1 : // open.
1155 1 : if v.reader != nil {
1156 1 : _ = v.reader.Close()
1157 1 : }
1158 1 : c.releasing.Done()
1159 : }
1160 :
1161 : type tableCacheNodeType int8
1162 :
1163 : const (
1164 : tableCacheNodeTest tableCacheNodeType = iota
1165 : tableCacheNodeCold
1166 : tableCacheNodeHot
1167 : )
1168 :
1169 0 : func (p tableCacheNodeType) String() string {
1170 0 : switch p {
1171 0 : case tableCacheNodeTest:
1172 0 : return "test"
1173 0 : case tableCacheNodeCold:
1174 0 : return "cold"
1175 0 : case tableCacheNodeHot:
1176 0 : return "hot"
1177 : }
1178 0 : return "unknown"
1179 : }
1180 :
1181 : type tableCacheNode struct {
1182 : fileNum base.DiskFileNum
1183 : value *tableCacheValue
1184 :
1185 : links struct {
1186 : next *tableCacheNode
1187 : prev *tableCacheNode
1188 : }
1189 : ptype tableCacheNodeType
1190 : // referenced is atomically set to indicate that this entry has been accessed
1191 : // since the last time one of the clock hands swept it.
1192 : referenced atomic.Bool
1193 :
1194 : // Storing the cache id associated with the DB instance here
1195 : // avoids the need to thread the dbOpts struct through many functions.
1196 : cacheID uint64
1197 : }
1198 :
1199 1 : func (n *tableCacheNode) next() *tableCacheNode {
1200 1 : if n == nil {
1201 0 : return nil
1202 0 : }
1203 1 : return n.links.next
1204 : }
1205 :
1206 1 : func (n *tableCacheNode) prev() *tableCacheNode {
1207 1 : if n == nil {
1208 0 : return nil
1209 0 : }
1210 1 : return n.links.prev
1211 : }
1212 :
1213 1 : func (n *tableCacheNode) link(s *tableCacheNode) {
1214 1 : s.links.prev = n.links.prev
1215 1 : s.links.prev.links.next = s
1216 1 : s.links.next = n
1217 1 : s.links.next.links.prev = s
1218 1 : }
1219 :
1220 1 : func (n *tableCacheNode) unlink() *tableCacheNode {
1221 1 : next := n.links.next
1222 1 : n.links.prev.links.next = n.links.next
1223 1 : n.links.next.links.prev = n.links.prev
1224 1 : n.links.prev = n
1225 1 : n.links.next = n
1226 1 : return next
1227 1 : }
|