LCOV - code coverage report
Current view: top level - pebble/sstable - test_fixtures.go (source / functions) Hit Total Coverage
Test: 2023-12-30 08:15Z 1cce3d01 - tests only.lcov Lines: 112 122 91.8 %
Date: 2023-12-30 08:15:56 Functions: 0 0 -

          Line data    Source code
       1             : // Copyright 2023 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2             : // of this source code is governed by a BSD-style license that can be found in
       3             : // the LICENSE file.
       4             : 
       5             : package sstable
       6             : 
       7             : import (
       8             :         "bufio"
       9             :         "fmt"
      10             :         "io"
      11             :         "math"
      12             :         "os"
      13             :         "path/filepath"
      14             :         "sort"
      15             :         "strings"
      16             :         "sync"
      17             : 
      18             :         "github.com/cockroachdb/pebble/bloom"
      19             :         "github.com/cockroachdb/pebble/internal/base"
      20             :         "github.com/cockroachdb/pebble/objstorage/objstorageprovider"
      21             :         "github.com/cockroachdb/pebble/vfs"
      22             : )
      23             : 
      24             : // testKVs is a key-value map holding test data.
      25             : type testKVs map[string]string
      26             : 
      27             : // SortedKeys returns the keys in the map, in sorted order.
      28           1 : func (m testKVs) SortedKeys() []string {
      29           1 :         res := make([]string, 0, len(m))
      30           1 :         for k := range m {
      31           1 :                 res = append(res, k)
      32           1 :         }
      33           1 :         sort.Strings(res)
      34           1 :         return res
      35             : }
      36             : 
      37             : // These variable should not be used directly, only via hamletWordCount().
      38             : var hamletWordCountState struct {
      39             :         once sync.Once
      40             :         data testKVs
      41             : }
      42             : 
      43             : // hamletWordCount returns the data in testdata.h/txt, as a map from word to
      44             : // count (as string).
      45           1 : func hamletWordCount() testKVs {
      46           1 :         hamletWordCountState.once.Do(func() {
      47           1 :                 wordCount := make(map[string]string)
      48           1 :                 f, err := os.Open(filepath.FromSlash("testdata/h.txt"))
      49           1 :                 if err != nil {
      50           0 :                         panic(err)
      51             :                 }
      52           1 :                 defer f.Close()
      53           1 :                 r := bufio.NewReader(f)
      54           1 : 
      55           1 :                 for {
      56           1 :                         s, err := r.ReadBytes('\n')
      57           1 :                         if err == io.EOF {
      58           1 :                                 break
      59             :                         }
      60           1 :                         if err != nil {
      61           0 :                                 panic(err)
      62             :                         }
      63           1 :                         k := strings.TrimSpace(string(s[8:]))
      64           1 :                         v := strings.TrimSpace(string(s[:8]))
      65           1 :                         wordCount[k] = v
      66             :                 }
      67           1 :                 if len(wordCount) != 1710 {
      68           0 :                         panic(fmt.Sprintf("h.txt entry count: got %d, want %d", len(wordCount), 1710))
      69             :                 }
      70           1 :                 for _, s := range hamletNonsenseWords {
      71           1 :                         if _, ok := wordCount[s]; ok {
      72           0 :                                 panic(fmt.Sprintf("nonsense word %q was in h.txt", s))
      73             :                         }
      74             :                 }
      75           1 :                 hamletWordCountState.data = wordCount
      76             :         })
      77           1 :         return hamletWordCountState.data
      78             : }
      79             : 
      80             : // hamletNonsenseWords are words that aren't in testdata/h.txt.
      81             : var hamletNonsenseWords = []string{
      82             :         // Edge cases.
      83             :         "",
      84             :         "\x00",
      85             :         "\xff",
      86             :         "`",
      87             :         "a\x00",
      88             :         "aaaaaa",
      89             :         "pol\x00nius",
      90             :         "youth\x00",
      91             :         "youti",
      92             :         "zzzzzz",
      93             :         // Capitalized versions of actual words in testdata/h.txt.
      94             :         "A",
      95             :         "Hamlet",
      96             :         "thEE",
      97             :         "YOUTH",
      98             :         // The following were generated by http://soybomb.com/tricks/words/
      99             :         "pectures",
     100             :         "exectly",
     101             :         "tricatrippian",
     102             :         "recens",
     103             :         "whiratroce",
     104             :         "troped",
     105             :         "balmous",
     106             :         "droppewry",
     107             :         "toilizing",
     108             :         "crocias",
     109             :         "eathrass",
     110             :         "cheakden",
     111             :         "speablett",
     112             :         "skirinies",
     113             :         "prefing",
     114             :         "bonufacision",
     115             : }
     116             : 
     117             : // buildHamletTestSST creates an sst file containing the hamlet word count data,
     118             : // using the given options.
     119             : func buildHamletTestSST(
     120             :         fs vfs.FS,
     121             :         filename string,
     122             :         compression Compression,
     123             :         fp FilterPolicy,
     124             :         ftype FilterType,
     125             :         comparer *Comparer,
     126             :         propCollector func() TablePropertyCollector,
     127             :         blockSize int,
     128             :         indexBlockSize int,
     129           1 : ) error {
     130           1 :         wordCount := hamletWordCount()
     131           1 :         keys := wordCount.SortedKeys()
     132           1 : 
     133           1 :         // Write the key/value pairs to a new table, in increasing key order.
     134           1 :         f0, err := fs.Create(filename)
     135           1 :         if err != nil {
     136           0 :                 return err
     137           0 :         }
     138             : 
     139           1 :         writerOpts := WriterOptions{
     140           1 :                 BlockSize:      blockSize,
     141           1 :                 Comparer:       comparer,
     142           1 :                 Compression:    compression,
     143           1 :                 FilterPolicy:   fp,
     144           1 :                 FilterType:     ftype,
     145           1 :                 IndexBlockSize: indexBlockSize,
     146           1 :                 MergerName:     "nullptr",
     147           1 :                 TableFormat:    fixtureFormat,
     148           1 :         }
     149           1 :         if propCollector != nil {
     150           1 :                 writerOpts.TablePropertyCollectors = append(writerOpts.TablePropertyCollectors, propCollector)
     151           1 :         }
     152             : 
     153           1 :         w := NewWriter(objstorageprovider.NewFileWritable(f0), writerOpts)
     154           1 :         // Use rangeDelV1Format for testing byte equality with RocksDB.
     155           1 :         w.rangeDelV1Format = true
     156           1 :         var rangeDelLength int
     157           1 :         var rangeDelCounter int
     158           1 :         var rangeDelStart InternalKey
     159           1 :         for i, k := range keys {
     160           1 :                 v := wordCount[k]
     161           1 :                 ikey := base.MakeInternalKey([]byte(k), 0, InternalKeyKindSet)
     162           1 :                 if err := w.Add(ikey, []byte(v)); err != nil {
     163           0 :                         return err
     164           0 :                 }
     165             :                 // This mirrors the logic in `make-table.cc`. It adds range deletions of
     166             :                 // increasing length for every 100 keys added.
     167           1 :                 if i%100 == 0 {
     168           1 :                         rangeDelStart = ikey.Clone()
     169           1 :                         rangeDelCounter = 0
     170           1 :                         rangeDelLength++
     171           1 :                 }
     172           1 :                 rangeDelCounter++
     173           1 : 
     174           1 :                 if rangeDelCounter == rangeDelLength {
     175           1 :                         if err := w.DeleteRange(rangeDelStart.UserKey, ikey.UserKey); err != nil {
     176           0 :                                 return err
     177           0 :                         }
     178             :                 }
     179             :         }
     180           1 :         return w.Close()
     181             : }
     182             : 
     183             : // TestFixtureInfo contains all metadata necessary to generate a test sstable.
     184             : type TestFixtureInfo struct {
     185             :         Filename                  string
     186             :         Compression               Compression
     187             :         FullKeyFilter             bool
     188             :         PrefixFilter              bool
     189             :         IndexBlockSize            int
     190             :         UseFixtureComparer        bool
     191             :         KeyCountPropertyCollector bool
     192             : }
     193             : 
     194             : // TestFixtures contains all metadata necessary to generate the test SSTs.
     195             : var TestFixtures = []TestFixtureInfo{
     196             :         {
     197             :                 Filename:                  "h.sst",
     198             :                 Compression:               SnappyCompression,
     199             :                 FullKeyFilter:             false,
     200             :                 PrefixFilter:              false,
     201             :                 IndexBlockSize:            fixtureDefaultIndexBlockSize,
     202             :                 UseFixtureComparer:        false,
     203             :                 KeyCountPropertyCollector: true,
     204             :         },
     205             :         {
     206             :                 Filename:                  "h.no-compression.sst",
     207             :                 Compression:               NoCompression,
     208             :                 FullKeyFilter:             false,
     209             :                 PrefixFilter:              false,
     210             :                 IndexBlockSize:            fixtureDefaultIndexBlockSize,
     211             :                 UseFixtureComparer:        false,
     212             :                 KeyCountPropertyCollector: true,
     213             :         },
     214             :         {
     215             :                 Filename:                  "h.table-bloom.sst",
     216             :                 Compression:               SnappyCompression,
     217             :                 FullKeyFilter:             true,
     218             :                 PrefixFilter:              false,
     219             :                 IndexBlockSize:            fixtureDefaultIndexBlockSize,
     220             :                 UseFixtureComparer:        false,
     221             :                 KeyCountPropertyCollector: false,
     222             :         },
     223             :         {
     224             :                 Filename:                  "h.table-bloom.no-compression.sst",
     225             :                 Compression:               NoCompression,
     226             :                 FullKeyFilter:             true,
     227             :                 PrefixFilter:              false,
     228             :                 IndexBlockSize:            fixtureDefaultIndexBlockSize,
     229             :                 UseFixtureComparer:        false,
     230             :                 KeyCountPropertyCollector: false,
     231             :         },
     232             :         {
     233             :                 Filename:                  "h.table-bloom.no-compression.prefix_extractor.no_whole_key_filter.sst",
     234             :                 Compression:               NoCompression,
     235             :                 FullKeyFilter:             false,
     236             :                 PrefixFilter:              true,
     237             :                 IndexBlockSize:            fixtureDefaultIndexBlockSize,
     238             :                 UseFixtureComparer:        true,
     239             :                 KeyCountPropertyCollector: false,
     240             :         },
     241             :         {
     242             :                 Filename:                  "h.no-compression.two_level_index.sst",
     243             :                 Compression:               NoCompression,
     244             :                 FullKeyFilter:             false,
     245             :                 PrefixFilter:              false,
     246             :                 IndexBlockSize:            fixtureSmallIndexBlockSize,
     247             :                 UseFixtureComparer:        false,
     248             :                 KeyCountPropertyCollector: true,
     249             :         },
     250             :         {
     251             :                 Filename:                  "h.zstd-compression.sst",
     252             :                 Compression:               ZstdCompression,
     253             :                 FullKeyFilter:             false,
     254             :                 PrefixFilter:              false,
     255             :                 IndexBlockSize:            fixtureDefaultIndexBlockSize,
     256             :                 UseFixtureComparer:        false,
     257             :                 KeyCountPropertyCollector: true,
     258             :         },
     259             : }
     260             : 
     261             : // Build creates an sst file for the given fixture.
     262           1 : func (tf TestFixtureInfo) Build(fs vfs.FS, filename string) error {
     263           1 :         var fp base.FilterPolicy
     264           1 :         if tf.FullKeyFilter || tf.PrefixFilter {
     265           1 :                 fp = bloom.FilterPolicy(10)
     266           1 :         }
     267           1 :         var comparer *Comparer
     268           1 :         if tf.UseFixtureComparer {
     269           1 :                 comparer = fixtureComparer
     270           1 :         }
     271           1 :         var propCollector func() TablePropertyCollector
     272           1 :         if tf.KeyCountPropertyCollector {
     273           1 :                 propCollector = func() TablePropertyCollector {
     274           1 :                         return &keyCountPropertyCollector{}
     275           1 :                 }
     276             :         }
     277             : 
     278           1 :         return buildHamletTestSST(
     279           1 :                 fs, filename, tf.Compression, fp, base.TableFilter,
     280           1 :                 comparer,
     281           1 :                 propCollector,
     282           1 :                 fixtureBlockSize,
     283           1 :                 tf.IndexBlockSize,
     284           1 :         )
     285             : }
     286             : 
     287             : const fixtureDefaultIndexBlockSize = math.MaxInt32
     288             : const fixtureSmallIndexBlockSize = 128
     289             : const fixtureBlockSize = 2048
     290             : const fixtureFormat = TableFormatPebblev1
     291             : 
     292             : type keyCountPropertyCollector struct {
     293             :         count int
     294             : }
     295             : 
     296           1 : func (c *keyCountPropertyCollector) Add(key InternalKey, value []byte) error {
     297           1 :         c.count++
     298           1 :         return nil
     299           1 : }
     300             : 
     301           1 : func (c *keyCountPropertyCollector) Finish(userProps map[string]string) error {
     302           1 :         userProps["test.key-count"] = fmt.Sprint(c.count)
     303           1 :         return nil
     304           1 : }
     305             : 
     306           1 : func (c *keyCountPropertyCollector) Name() string {
     307           1 :         return "KeyCountPropertyCollector"
     308           1 : }
     309             : 
     310           1 : var fixtureComparer = func() *Comparer {
     311           1 :         c := *base.DefaultComparer
     312           1 :         // NB: this is named as such only to match the built-in RocksDB comparer.
     313           1 :         c.Name = "leveldb.BytewiseComparator"
     314           1 :         c.Split = func(a []byte) int {
     315           1 :                 // TODO(tbg): It's difficult to provide a more meaningful prefix extractor
     316           1 :                 // on the given dataset since it's not MVCC, and so it's impossible to come
     317           1 :                 // up with a sensible one. We need to add a better dataset and use that
     318           1 :                 // instead to get confidence that prefix extractors are working as intended.
     319           1 :                 return len(a)
     320           1 :         }
     321           1 :         return &c
     322             : }()

Generated by: LCOV version 1.14