LCOV - code coverage report
Current view: top level - pebble/sstable - test_fixtures.go (source / functions) Hit Total Coverage
Test: 2024-09-25 08:16Z b34a3937 - tests + meta.lcov Lines: 91 101 90.1 %
Date: 2024-09-25 08:17:40 Functions: 0 0 -

          Line data    Source code
       1             : // Copyright 2023 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2             : // of this source code is governed by a BSD-style license that can be found in
       3             : // the LICENSE file.
       4             : 
       5             : package sstable
       6             : 
       7             : import (
       8             :         "bufio"
       9             :         "fmt"
      10             :         "io"
      11             :         "math"
      12             :         "os"
      13             :         "path/filepath"
      14             :         "sort"
      15             :         "strings"
      16             :         "sync"
      17             : 
      18             :         "github.com/cockroachdb/pebble/bloom"
      19             :         "github.com/cockroachdb/pebble/internal/base"
      20             :         "github.com/cockroachdb/pebble/objstorage/objstorageprovider"
      21             :         "github.com/cockroachdb/pebble/sstable/block"
      22             :         "github.com/cockroachdb/pebble/vfs"
      23             : )
      24             : 
      25             : // testKVs is a key-value map holding test data.
      26             : type testKVs map[string]string
      27             : 
      28             : // SortedKeys returns the keys in the map, in sorted order.
      29           1 : func (m testKVs) SortedKeys() []string {
      30           1 :         res := make([]string, 0, len(m))
      31           1 :         for k := range m {
      32           1 :                 res = append(res, k)
      33           1 :         }
      34           1 :         sort.Strings(res)
      35           1 :         return res
      36             : }
      37             : 
      38             : // These variable should not be used directly, only via hamletWordCount().
      39             : var hamletWordCountState struct {
      40             :         once sync.Once
      41             :         data testKVs
      42             : }
      43             : 
      44             : // hamletWordCount returns the data in testdata.h/txt, as a map from word to
      45             : // count (as string).
      46           1 : func hamletWordCount() testKVs {
      47           1 :         hamletWordCountState.once.Do(func() {
      48           1 :                 wordCount := make(map[string]string)
      49           1 :                 f, err := os.Open(filepath.FromSlash("testdata/h.txt"))
      50           1 :                 if err != nil {
      51           0 :                         panic(err)
      52             :                 }
      53           1 :                 defer f.Close()
      54           1 :                 r := bufio.NewReader(f)
      55           1 : 
      56           1 :                 for {
      57           1 :                         s, err := r.ReadBytes('\n')
      58           1 :                         if err == io.EOF {
      59           1 :                                 break
      60             :                         }
      61           1 :                         if err != nil {
      62           0 :                                 panic(err)
      63             :                         }
      64           1 :                         k := strings.TrimSpace(string(s[8:]))
      65           1 :                         v := strings.TrimSpace(string(s[:8]))
      66           1 :                         wordCount[k] = v
      67             :                 }
      68           1 :                 if len(wordCount) != 1710 {
      69           0 :                         panic(fmt.Sprintf("h.txt entry count: got %d, want %d", len(wordCount), 1710))
      70             :                 }
      71           1 :                 for _, s := range hamletNonsenseWords {
      72           1 :                         if _, ok := wordCount[s]; ok {
      73           0 :                                 panic(fmt.Sprintf("nonsense word %q was in h.txt", s))
      74             :                         }
      75             :                 }
      76           1 :                 hamletWordCountState.data = wordCount
      77             :         })
      78           1 :         return hamletWordCountState.data
      79             : }
      80             : 
      81             : // hamletNonsenseWords are words that aren't in testdata/h.txt.
      82             : var hamletNonsenseWords = []string{
      83             :         // Edge cases.
      84             :         "",
      85             :         "\x00",
      86             :         "\xff",
      87             :         "`",
      88             :         "a\x00",
      89             :         "aaaaaa",
      90             :         "pol\x00nius",
      91             :         "youth\x00",
      92             :         "youti",
      93             :         "zzzzzz",
      94             :         // Capitalized versions of actual words in testdata/h.txt.
      95             :         "A",
      96             :         "Hamlet",
      97             :         "thEE",
      98             :         "YOUTH",
      99             :         // The following were generated by http://soybomb.com/tricks/words/
     100             :         "pectures",
     101             :         "exectly",
     102             :         "tricatrippian",
     103             :         "recens",
     104             :         "whiratroce",
     105             :         "troped",
     106             :         "balmous",
     107             :         "droppewry",
     108             :         "toilizing",
     109             :         "crocias",
     110             :         "eathrass",
     111             :         "cheakden",
     112             :         "speablett",
     113             :         "skirinies",
     114             :         "prefing",
     115             :         "bonufacision",
     116             : }
     117             : 
     118             : // buildHamletTestSST creates an sst file containing the hamlet word count data,
     119             : // using the given options.
     120             : func buildHamletTestSST(
     121             :         fs vfs.FS,
     122             :         filename string,
     123             :         compression block.Compression,
     124             :         fp FilterPolicy,
     125             :         ftype FilterType,
     126             :         comparer *Comparer,
     127             :         blockSize int,
     128             :         indexBlockSize int,
     129           1 : ) error {
     130           1 :         wordCount := hamletWordCount()
     131           1 :         keys := wordCount.SortedKeys()
     132           1 : 
     133           1 :         // Write the key/value pairs to a new table, in increasing key order.
     134           1 :         f0, err := fs.Create(filename, vfs.WriteCategoryUnspecified)
     135           1 :         if err != nil {
     136           0 :                 return err
     137           0 :         }
     138             : 
     139           1 :         writerOpts := WriterOptions{
     140           1 :                 BlockSize:      blockSize,
     141           1 :                 Comparer:       comparer,
     142           1 :                 Compression:    compression,
     143           1 :                 FilterPolicy:   fp,
     144           1 :                 FilterType:     ftype,
     145           1 :                 IndexBlockSize: indexBlockSize,
     146           1 :                 MergerName:     "nullptr",
     147           1 :                 TableFormat:    fixtureFormat,
     148           1 :         }
     149           1 : 
     150           1 :         w := NewWriter(objstorageprovider.NewFileWritable(f0), writerOpts)
     151           1 :         // NB: We don't have byte equality with RocksDB any longer.
     152           1 :         var rangeDelLength int
     153           1 :         var rangeDelCounter int
     154           1 :         var rangeDelStart []byte
     155           1 :         for i, k := range keys {
     156           1 :                 v := wordCount[k]
     157           1 :                 if err := w.Set([]byte(k), []byte(v)); err != nil {
     158           0 :                         return err
     159           0 :                 }
     160             :                 // This mirrors the logic in `make-table.cc`. It adds range deletions of
     161             :                 // increasing length for every 100 keys added.
     162           1 :                 if i%100 == 0 {
     163           1 :                         rangeDelStart = []byte(k)
     164           1 :                         rangeDelCounter = 0
     165           1 :                         rangeDelLength++
     166           1 :                 }
     167           1 :                 rangeDelCounter++
     168           1 : 
     169           1 :                 if rangeDelCounter == rangeDelLength {
     170           1 :                         if err := w.DeleteRange(rangeDelStart, []byte(k)); err != nil {
     171           0 :                                 return err
     172           0 :                         }
     173             :                 }
     174             :         }
     175           1 :         return w.Close()
     176             : }
     177             : 
     178             : // TestFixtureInfo contains all metadata necessary to generate a test sstable.
     179             : type TestFixtureInfo struct {
     180             :         Filename           string
     181             :         Compression        block.Compression
     182             :         FullKeyFilter      bool
     183             :         PrefixFilter       bool
     184             :         IndexBlockSize     int
     185             :         UseFixtureComparer bool
     186             : }
     187             : 
     188             : // TestFixtures contains all metadata necessary to generate the test SSTs.
     189             : var TestFixtures = []TestFixtureInfo{
     190             :         {
     191             :                 Filename:           "h.sst",
     192             :                 Compression:        block.SnappyCompression,
     193             :                 FullKeyFilter:      false,
     194             :                 PrefixFilter:       false,
     195             :                 IndexBlockSize:     fixtureDefaultIndexBlockSize,
     196             :                 UseFixtureComparer: false,
     197             :         },
     198             :         {
     199             :                 Filename:           "h.no-compression.sst",
     200             :                 Compression:        block.NoCompression,
     201             :                 FullKeyFilter:      false,
     202             :                 PrefixFilter:       false,
     203             :                 IndexBlockSize:     fixtureDefaultIndexBlockSize,
     204             :                 UseFixtureComparer: false,
     205             :         },
     206             :         {
     207             :                 Filename:           "h.table-bloom.sst",
     208             :                 Compression:        block.SnappyCompression,
     209             :                 FullKeyFilter:      true,
     210             :                 PrefixFilter:       false,
     211             :                 IndexBlockSize:     fixtureDefaultIndexBlockSize,
     212             :                 UseFixtureComparer: false,
     213             :         },
     214             :         {
     215             :                 Filename:           "h.table-bloom.no-compression.sst",
     216             :                 Compression:        block.NoCompression,
     217             :                 FullKeyFilter:      true,
     218             :                 PrefixFilter:       false,
     219             :                 IndexBlockSize:     fixtureDefaultIndexBlockSize,
     220             :                 UseFixtureComparer: false,
     221             :         },
     222             :         {
     223             :                 Filename:           "h.table-bloom.no-compression.prefix_extractor.no_whole_key_filter.sst",
     224             :                 Compression:        block.NoCompression,
     225             :                 FullKeyFilter:      false,
     226             :                 PrefixFilter:       true,
     227             :                 IndexBlockSize:     fixtureDefaultIndexBlockSize,
     228             :                 UseFixtureComparer: true,
     229             :         },
     230             :         {
     231             :                 Filename:           "h.no-compression.two_level_index.sst",
     232             :                 Compression:        block.NoCompression,
     233             :                 FullKeyFilter:      false,
     234             :                 PrefixFilter:       false,
     235             :                 IndexBlockSize:     fixtureSmallIndexBlockSize,
     236             :                 UseFixtureComparer: false,
     237             :         },
     238             :         {
     239             :                 Filename:           "h.zstd-compression.sst",
     240             :                 Compression:        block.ZstdCompression,
     241             :                 FullKeyFilter:      false,
     242             :                 PrefixFilter:       false,
     243             :                 IndexBlockSize:     fixtureDefaultIndexBlockSize,
     244             :                 UseFixtureComparer: false,
     245             :         },
     246             : }
     247             : 
     248             : // Build creates an sst file for the given fixture.
     249           1 : func (tf TestFixtureInfo) Build(fs vfs.FS, filename string) error {
     250           1 :         var fp base.FilterPolicy
     251           1 :         if tf.FullKeyFilter || tf.PrefixFilter {
     252           1 :                 fp = bloom.FilterPolicy(10)
     253           1 :         }
     254           1 :         var comparer *Comparer
     255           1 :         if tf.UseFixtureComparer {
     256           1 :                 comparer = fixtureComparer
     257           1 :         }
     258             : 
     259           1 :         return buildHamletTestSST(
     260           1 :                 fs, filename, tf.Compression, fp, base.TableFilter,
     261           1 :                 comparer,
     262           1 :                 fixtureBlockSize,
     263           1 :                 tf.IndexBlockSize,
     264           1 :         )
     265             : }
     266             : 
     267             : const fixtureDefaultIndexBlockSize = math.MaxInt32
     268             : const fixtureSmallIndexBlockSize = 128
     269             : const fixtureBlockSize = 2048
     270             : const fixtureFormat = TableFormatPebblev1
     271             : 
     272           2 : var fixtureComparer = func() *Comparer {
     273           2 :         c := *base.DefaultComparer
     274           2 :         // NB: this is named as such only to match the built-in RocksDB comparer.
     275           2 :         c.Name = "leveldb.BytewiseComparator"
     276           2 :         c.Split = func(a []byte) int {
     277           1 :                 // TODO(tbg): It's difficult to provide a more meaningful prefix extractor
     278           1 :                 // on the given dataset since it's not MVCC, and so it's impossible to come
     279           1 :                 // up with a sensible one. We need to add a better dataset and use that
     280           1 :                 // instead to get confidence that prefix extractors are working as intended.
     281           1 :                 return len(a)
     282           1 :         }
     283           2 :         return &c
     284             : }()

Generated by: LCOV version 1.14