LCOV - code coverage report
Current view: top level - pebble/sstable - test_fixtures.go (source / functions) Hit Total Coverage
Test: 2024-04-01 08:16Z 1c7bcd1c - tests + meta.lcov Lines: 93 103 90.3 %
Date: 2024-04-01 08:17:59 Functions: 0 0 -

          Line data    Source code
       1             : // Copyright 2023 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2             : // of this source code is governed by a BSD-style license that can be found in
       3             : // the LICENSE file.
       4             : 
       5             : package sstable
       6             : 
       7             : import (
       8             :         "bufio"
       9             :         "fmt"
      10             :         "io"
      11             :         "math"
      12             :         "os"
      13             :         "path/filepath"
      14             :         "sort"
      15             :         "strings"
      16             :         "sync"
      17             : 
      18             :         "github.com/cockroachdb/pebble/bloom"
      19             :         "github.com/cockroachdb/pebble/internal/base"
      20             :         "github.com/cockroachdb/pebble/objstorage/objstorageprovider"
      21             :         "github.com/cockroachdb/pebble/vfs"
      22             : )
      23             : 
      24             : // testKVs is a key-value map holding test data.
      25             : type testKVs map[string]string
      26             : 
      27             : // SortedKeys returns the keys in the map, in sorted order.
      28           1 : func (m testKVs) SortedKeys() []string {
      29           1 :         res := make([]string, 0, len(m))
      30           1 :         for k := range m {
      31           1 :                 res = append(res, k)
      32           1 :         }
      33           1 :         sort.Strings(res)
      34           1 :         return res
      35             : }
      36             : 
      37             : // These variable should not be used directly, only via hamletWordCount().
      38             : var hamletWordCountState struct {
      39             :         once sync.Once
      40             :         data testKVs
      41             : }
      42             : 
      43             : // hamletWordCount returns the data in testdata.h/txt, as a map from word to
      44             : // count (as string).
      45           1 : func hamletWordCount() testKVs {
      46           1 :         hamletWordCountState.once.Do(func() {
      47           1 :                 wordCount := make(map[string]string)
      48           1 :                 f, err := os.Open(filepath.FromSlash("testdata/h.txt"))
      49           1 :                 if err != nil {
      50           0 :                         panic(err)
      51             :                 }
      52           1 :                 defer f.Close()
      53           1 :                 r := bufio.NewReader(f)
      54           1 : 
      55           1 :                 for {
      56           1 :                         s, err := r.ReadBytes('\n')
      57           1 :                         if err == io.EOF {
      58           1 :                                 break
      59             :                         }
      60           1 :                         if err != nil {
      61           0 :                                 panic(err)
      62             :                         }
      63           1 :                         k := strings.TrimSpace(string(s[8:]))
      64           1 :                         v := strings.TrimSpace(string(s[:8]))
      65           1 :                         wordCount[k] = v
      66             :                 }
      67           1 :                 if len(wordCount) != 1710 {
      68           0 :                         panic(fmt.Sprintf("h.txt entry count: got %d, want %d", len(wordCount), 1710))
      69             :                 }
      70           1 :                 for _, s := range hamletNonsenseWords {
      71           1 :                         if _, ok := wordCount[s]; ok {
      72           0 :                                 panic(fmt.Sprintf("nonsense word %q was in h.txt", s))
      73             :                         }
      74             :                 }
      75           1 :                 hamletWordCountState.data = wordCount
      76             :         })
      77           1 :         return hamletWordCountState.data
      78             : }
      79             : 
      80             : // hamletNonsenseWords are words that aren't in testdata/h.txt.
      81             : var hamletNonsenseWords = []string{
      82             :         // Edge cases.
      83             :         "",
      84             :         "\x00",
      85             :         "\xff",
      86             :         "`",
      87             :         "a\x00",
      88             :         "aaaaaa",
      89             :         "pol\x00nius",
      90             :         "youth\x00",
      91             :         "youti",
      92             :         "zzzzzz",
      93             :         // Capitalized versions of actual words in testdata/h.txt.
      94             :         "A",
      95             :         "Hamlet",
      96             :         "thEE",
      97             :         "YOUTH",
      98             :         // The following were generated by http://soybomb.com/tricks/words/
      99             :         "pectures",
     100             :         "exectly",
     101             :         "tricatrippian",
     102             :         "recens",
     103             :         "whiratroce",
     104             :         "troped",
     105             :         "balmous",
     106             :         "droppewry",
     107             :         "toilizing",
     108             :         "crocias",
     109             :         "eathrass",
     110             :         "cheakden",
     111             :         "speablett",
     112             :         "skirinies",
     113             :         "prefing",
     114             :         "bonufacision",
     115             : }
     116             : 
     117             : // buildHamletTestSST creates an sst file containing the hamlet word count data,
     118             : // using the given options.
     119             : func buildHamletTestSST(
     120             :         fs vfs.FS,
     121             :         filename string,
     122             :         compression Compression,
     123             :         fp FilterPolicy,
     124             :         ftype FilterType,
     125             :         comparer *Comparer,
     126             :         blockSize int,
     127             :         indexBlockSize int,
     128           1 : ) error {
     129           1 :         wordCount := hamletWordCount()
     130           1 :         keys := wordCount.SortedKeys()
     131           1 : 
     132           1 :         // Write the key/value pairs to a new table, in increasing key order.
     133           1 :         f0, err := fs.Create(filename)
     134           1 :         if err != nil {
     135           0 :                 return err
     136           0 :         }
     137             : 
     138           1 :         writerOpts := WriterOptions{
     139           1 :                 BlockSize:      blockSize,
     140           1 :                 Comparer:       comparer,
     141           1 :                 Compression:    compression,
     142           1 :                 FilterPolicy:   fp,
     143           1 :                 FilterType:     ftype,
     144           1 :                 IndexBlockSize: indexBlockSize,
     145           1 :                 MergerName:     "nullptr",
     146           1 :                 TableFormat:    fixtureFormat,
     147           1 :         }
     148           1 : 
     149           1 :         w := NewWriter(objstorageprovider.NewFileWritable(f0), writerOpts)
     150           1 :         // Use rangeDelV1Format for testing byte equality with RocksDB.
     151           1 :         w.rangeDelV1Format = true
     152           1 :         var rangeDelLength int
     153           1 :         var rangeDelCounter int
     154           1 :         var rangeDelStart InternalKey
     155           1 :         for i, k := range keys {
     156           1 :                 v := wordCount[k]
     157           1 :                 ikey := base.MakeInternalKey([]byte(k), 0, InternalKeyKindSet)
     158           1 :                 if err := w.Add(ikey, []byte(v)); err != nil {
     159           0 :                         return err
     160           0 :                 }
     161             :                 // This mirrors the logic in `make-table.cc`. It adds range deletions of
     162             :                 // increasing length for every 100 keys added.
     163           1 :                 if i%100 == 0 {
     164           1 :                         rangeDelStart = ikey.Clone()
     165           1 :                         rangeDelCounter = 0
     166           1 :                         rangeDelLength++
     167           1 :                 }
     168           1 :                 rangeDelCounter++
     169           1 : 
     170           1 :                 if rangeDelCounter == rangeDelLength {
     171           1 :                         if err := w.DeleteRange(rangeDelStart.UserKey, ikey.UserKey); err != nil {
     172           0 :                                 return err
     173           0 :                         }
     174             :                 }
     175             :         }
     176           1 :         return w.Close()
     177             : }
     178             : 
     179             : // TestFixtureInfo contains all metadata necessary to generate a test sstable.
     180             : type TestFixtureInfo struct {
     181             :         Filename           string
     182             :         Compression        Compression
     183             :         FullKeyFilter      bool
     184             :         PrefixFilter       bool
     185             :         IndexBlockSize     int
     186             :         UseFixtureComparer bool
     187             : }
     188             : 
     189             : // TestFixtures contains all metadata necessary to generate the test SSTs.
     190             : var TestFixtures = []TestFixtureInfo{
     191             :         {
     192             :                 Filename:           "h.sst",
     193             :                 Compression:        SnappyCompression,
     194             :                 FullKeyFilter:      false,
     195             :                 PrefixFilter:       false,
     196             :                 IndexBlockSize:     fixtureDefaultIndexBlockSize,
     197             :                 UseFixtureComparer: false,
     198             :         },
     199             :         {
     200             :                 Filename:           "h.no-compression.sst",
     201             :                 Compression:        NoCompression,
     202             :                 FullKeyFilter:      false,
     203             :                 PrefixFilter:       false,
     204             :                 IndexBlockSize:     fixtureDefaultIndexBlockSize,
     205             :                 UseFixtureComparer: false,
     206             :         },
     207             :         {
     208             :                 Filename:           "h.table-bloom.sst",
     209             :                 Compression:        SnappyCompression,
     210             :                 FullKeyFilter:      true,
     211             :                 PrefixFilter:       false,
     212             :                 IndexBlockSize:     fixtureDefaultIndexBlockSize,
     213             :                 UseFixtureComparer: false,
     214             :         },
     215             :         {
     216             :                 Filename:           "h.table-bloom.no-compression.sst",
     217             :                 Compression:        NoCompression,
     218             :                 FullKeyFilter:      true,
     219             :                 PrefixFilter:       false,
     220             :                 IndexBlockSize:     fixtureDefaultIndexBlockSize,
     221             :                 UseFixtureComparer: false,
     222             :         },
     223             :         {
     224             :                 Filename:           "h.table-bloom.no-compression.prefix_extractor.no_whole_key_filter.sst",
     225             :                 Compression:        NoCompression,
     226             :                 FullKeyFilter:      false,
     227             :                 PrefixFilter:       true,
     228             :                 IndexBlockSize:     fixtureDefaultIndexBlockSize,
     229             :                 UseFixtureComparer: true,
     230             :         },
     231             :         {
     232             :                 Filename:           "h.no-compression.two_level_index.sst",
     233             :                 Compression:        NoCompression,
     234             :                 FullKeyFilter:      false,
     235             :                 PrefixFilter:       false,
     236             :                 IndexBlockSize:     fixtureSmallIndexBlockSize,
     237             :                 UseFixtureComparer: false,
     238             :         },
     239             :         {
     240             :                 Filename:           "h.zstd-compression.sst",
     241             :                 Compression:        ZstdCompression,
     242             :                 FullKeyFilter:      false,
     243             :                 PrefixFilter:       false,
     244             :                 IndexBlockSize:     fixtureDefaultIndexBlockSize,
     245             :                 UseFixtureComparer: false,
     246             :         },
     247             : }
     248             : 
     249             : // Build creates an sst file for the given fixture.
     250           1 : func (tf TestFixtureInfo) Build(fs vfs.FS, filename string) error {
     251           1 :         var fp base.FilterPolicy
     252           1 :         if tf.FullKeyFilter || tf.PrefixFilter {
     253           1 :                 fp = bloom.FilterPolicy(10)
     254           1 :         }
     255           1 :         var comparer *Comparer
     256           1 :         if tf.UseFixtureComparer {
     257           1 :                 comparer = fixtureComparer
     258           1 :         }
     259             : 
     260           1 :         return buildHamletTestSST(
     261           1 :                 fs, filename, tf.Compression, fp, base.TableFilter,
     262           1 :                 comparer,
     263           1 :                 fixtureBlockSize,
     264           1 :                 tf.IndexBlockSize,
     265           1 :         )
     266             : }
     267             : 
     268             : const fixtureDefaultIndexBlockSize = math.MaxInt32
     269             : const fixtureSmallIndexBlockSize = 128
     270             : const fixtureBlockSize = 2048
     271             : const fixtureFormat = TableFormatPebblev1
     272             : 
     273           2 : var fixtureComparer = func() *Comparer {
     274           2 :         c := *base.DefaultComparer
     275           2 :         // NB: this is named as such only to match the built-in RocksDB comparer.
     276           2 :         c.Name = "leveldb.BytewiseComparator"
     277           2 :         c.Split = func(a []byte) int {
     278           1 :                 // TODO(tbg): It's difficult to provide a more meaningful prefix extractor
     279           1 :                 // on the given dataset since it's not MVCC, and so it's impossible to come
     280           1 :                 // up with a sensible one. We need to add a better dataset and use that
     281           1 :                 // instead to get confidence that prefix extractors are working as intended.
     282           1 :                 return len(a)
     283           1 :         }
     284           2 :         return &c
     285             : }()

Generated by: LCOV version 1.14