Line data Source code
1 : // Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package tool
6 :
7 : import (
8 : "encoding/json"
9 : "fmt"
10 : "io"
11 : "log"
12 : "math"
13 : "slices"
14 :
15 : "github.com/cockroachdb/errors"
16 : "github.com/cockroachdb/pebble"
17 : "github.com/cockroachdb/pebble/internal/base"
18 : "github.com/cockroachdb/pebble/internal/manifest"
19 : "github.com/cockroachdb/pebble/record"
20 : "github.com/cockroachdb/pebble/sstable"
21 : "github.com/spf13/cobra"
22 : )
23 :
24 : //go:generate ./make_lsm_data.sh
25 :
26 : type lsmFileMetadata struct {
27 : Size uint64
28 : Smallest int // ID of smallest key
29 : Largest int // ID of largest key
30 : SmallestSeqNum base.SeqNum
31 : LargestSeqNum base.SeqNum
32 : Virtual bool
33 : }
34 :
35 : type lsmVersionEdit struct {
36 : // Reason for the edit: flushed, ingested, compacted, added.
37 : Reason string
38 : // Map from level to files added to the level.
39 : Added map[int][]base.FileNum `json:",omitempty"`
40 : // Map from level to files deleted from the level.
41 : Deleted map[int][]base.FileNum `json:",omitempty"`
42 : // L0 sublevels for any files with changed sublevels so far.
43 : Sublevels map[base.FileNum]int `json:",omitempty"`
44 : }
45 :
46 : type lsmKey struct {
47 : Pretty string
48 : SeqNum base.SeqNum
49 : Kind int
50 : }
51 :
52 : type lsmState struct {
53 : Manifest string
54 : Edits []lsmVersionEdit `json:",omitempty"`
55 : Files map[base.FileNum]lsmFileMetadata `json:",omitempty"`
56 : Keys []lsmKey `json:",omitempty"`
57 : StartEdit int64
58 : }
59 :
60 : type lsmT struct {
61 : Root *cobra.Command
62 :
63 : // Configuration.
64 : opts *pebble.Options
65 : comparers sstable.Comparers
66 :
67 : fmtKey keyFormatter
68 : embed bool
69 : pretty bool
70 : startEdit int64
71 : endEdit int64
72 : editCount int64
73 :
74 : cmp *base.Comparer
75 : state lsmState
76 : keyMap map[lsmKey]int
77 : }
78 :
79 1 : func newLSM(opts *pebble.Options, comparers sstable.Comparers) *lsmT {
80 1 : l := &lsmT{
81 1 : opts: opts,
82 1 : comparers: comparers,
83 1 : }
84 1 : l.fmtKey.mustSet("quoted")
85 1 :
86 1 : l.Root = &cobra.Command{
87 1 : Use: "lsm <manifest>",
88 1 : Short: "LSM visualization tool",
89 1 : Long: `
90 1 : Visualize the evolution of an LSM from the version edits in a MANIFEST.
91 1 :
92 1 : Given an input MANIFEST, output an HTML file containing a visualization showing
93 1 : the evolution of the LSM. Each version edit in the MANIFEST becomes a single
94 1 : step in the visualization. The 7 levels of the LSM are depicted with each
95 1 : sstable represented as a 1-pixel wide rectangle. The height of the rectangle is
96 1 : proportional to the size (in bytes) of the sstable. The sstables are displayed
97 1 : in the same order as they occur in the LSM. Note that the sstables from
98 1 : different levels are NOT aligned according to their start and end keys (doing so
99 1 : is also interesting, but it works against using the area of the rectangle to
100 1 : indicate size).
101 1 : `,
102 1 : Args: cobra.ExactArgs(1),
103 1 : RunE: l.runLSM,
104 1 : }
105 1 :
106 1 : l.Root.Flags().Var(&l.fmtKey, "key", "key formatter")
107 1 : l.Root.Flags().BoolVar(&l.embed, "embed", true, "embed javascript in HTML (disable for development)")
108 1 : l.Root.Flags().BoolVar(&l.pretty, "pretty", false, "pretty JSON output")
109 1 : l.Root.Flags().Int64Var(&l.startEdit, "start-edit", 0, "starting edit # to include in visualization")
110 1 : l.Root.Flags().Int64Var(&l.endEdit, "end-edit", math.MaxInt64, "ending edit # to include in visualization")
111 1 : l.Root.Flags().Int64Var(&l.editCount, "edit-count", math.MaxInt64, "count of edits to include in visualization")
112 1 : return l
113 1 : }
114 :
115 0 : func (l *lsmT) isFlagSet(name string) bool {
116 0 : return l.Root.Flags().Changed(name)
117 0 : }
118 :
119 0 : func (l *lsmT) validateFlags() error {
120 0 : if l.isFlagSet("edit-count") {
121 0 : if l.isFlagSet("start-edit") && l.isFlagSet("end-edit") {
122 0 : return errors.Errorf("edit-count cannot be provided with both start-edit and end-edit")
123 0 : } else if l.isFlagSet("end-edit") {
124 0 : return errors.Errorf("cannot use edit-count with end-edit, use start-edit and end-edit instead")
125 0 : }
126 : }
127 :
128 0 : if l.startEdit > l.endEdit {
129 0 : return errors.Errorf("start-edit cannot be after end-edit")
130 0 : }
131 :
132 0 : return nil
133 : }
134 :
135 0 : func (l *lsmT) runLSM(cmd *cobra.Command, args []string) error {
136 0 : err := l.validateFlags()
137 0 : if err != nil {
138 0 : return err
139 0 : }
140 :
141 0 : edits := l.readManifest(args[0])
142 0 : if edits == nil {
143 0 : return nil
144 0 : }
145 :
146 0 : if l.startEdit > 0 {
147 0 : edits, err = l.coalesceEdits(edits)
148 0 : if err != nil {
149 0 : return err
150 0 : }
151 : }
152 0 : if l.endEdit < int64(len(edits)) {
153 0 : edits = edits[:l.endEdit-l.startEdit+1]
154 0 : }
155 0 : if l.editCount < int64(len(edits)) {
156 0 : edits = edits[:l.editCount]
157 0 : }
158 :
159 0 : l.buildKeys(edits)
160 0 : err = l.buildEdits(edits)
161 0 : if err != nil {
162 0 : return err
163 0 : }
164 :
165 0 : w := l.Root.OutOrStdout()
166 0 :
167 0 : fmt.Fprintf(w, `<!DOCTYPE html>
168 0 : <html>
169 0 : <head>
170 0 : <meta charset="utf-8">
171 0 : <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
172 0 : `)
173 0 : if l.embed {
174 0 : fmt.Fprintf(w, "<style>%s</style>\n", lsmDataCSS)
175 0 : } else {
176 0 : fmt.Fprintf(w, "<link rel=\"stylesheet\" href=\"data/lsm.css\">\n")
177 0 : }
178 0 : fmt.Fprintf(w, "</head>\n<body>\n")
179 0 : if l.embed {
180 0 : fmt.Fprintf(w, "<script src=\"https://d3js.org/d3.v5.min.js\"></script>\n")
181 0 : } else {
182 0 : fmt.Fprintf(w, "<script src=\"data/d3.v5.min.js\"></script>\n")
183 0 : }
184 0 : fmt.Fprintf(w, "<script type=\"text/javascript\">\n")
185 0 : fmt.Fprintf(w, "data = %s\n", l.formatJSON(l.state))
186 0 : fmt.Fprintf(w, "</script>\n")
187 0 : if l.embed {
188 0 : fmt.Fprintf(w, "<script type=\"text/javascript\">%s</script>\n", lsmDataJS)
189 0 : } else {
190 0 : fmt.Fprintf(w, "<script src=\"data/lsm.js\"></script>\n")
191 0 : }
192 0 : fmt.Fprintf(w, "</body>\n</html>\n")
193 0 :
194 0 : return nil
195 : }
196 :
197 0 : func (l *lsmT) readManifest(path string) []*manifest.VersionEdit {
198 0 : f, err := l.opts.FS.Open(path)
199 0 : if err != nil {
200 0 : fmt.Fprintf(l.Root.OutOrStderr(), "%s\n", err)
201 0 : return nil
202 0 : }
203 0 : defer f.Close()
204 0 :
205 0 : l.state.Manifest = path
206 0 :
207 0 : var edits []*manifest.VersionEdit
208 0 : w := l.Root.OutOrStdout()
209 0 : rr := record.NewReader(f, 0 /* logNum */)
210 0 : for i := 0; ; i++ {
211 0 : r, err := rr.Next()
212 0 : if err != nil {
213 0 : if err != io.EOF {
214 0 : fmt.Fprintf(w, "%s\n", err)
215 0 : }
216 0 : break
217 : }
218 :
219 0 : ve := &manifest.VersionEdit{}
220 0 : err = ve.Decode(r)
221 0 : if err != nil {
222 0 : fmt.Fprintf(w, "%s\n", err)
223 0 : break
224 : }
225 0 : edits = append(edits, ve)
226 0 :
227 0 : if ve.ComparerName != "" {
228 0 : l.cmp = l.comparers[ve.ComparerName]
229 0 : if l.cmp == nil {
230 0 : fmt.Fprintf(w, "%d: unknown comparer %q\n", i, ve.ComparerName)
231 0 : return nil
232 0 : }
233 0 : l.fmtKey.setForComparer(ve.ComparerName, l.comparers)
234 0 : } else if l.cmp == nil {
235 0 : l.cmp = &base.Comparer{}
236 0 : *l.cmp = *base.DefaultComparer
237 0 : l.cmp.FormatKey = l.fmtKey.fn
238 0 : }
239 : }
240 0 : return edits
241 : }
242 :
243 0 : func (l *lsmT) buildKeys(edits []*manifest.VersionEdit) {
244 0 : var keys []base.InternalKey
245 0 : for _, ve := range edits {
246 0 : for i := range ve.NewFiles {
247 0 : nf := &ve.NewFiles[i]
248 0 : keys = append(keys, nf.Meta.Smallest)
249 0 : keys = append(keys, nf.Meta.Largest)
250 0 : }
251 : }
252 :
253 0 : l.keyMap = make(map[lsmKey]int)
254 0 :
255 0 : slices.SortFunc(keys, func(a, b base.InternalKey) int {
256 0 : return base.InternalCompare(l.cmp.Compare, a, b)
257 0 : })
258 :
259 0 : for i := range keys {
260 0 : k := &keys[i]
261 0 : if i > 0 && base.InternalCompare(l.cmp.Compare, keys[i-1], keys[i]) == 0 {
262 0 : continue
263 : }
264 0 : j := len(l.state.Keys)
265 0 : l.state.Keys = append(l.state.Keys, lsmKey{
266 0 : Pretty: fmt.Sprint(l.fmtKey.fn(k.UserKey)),
267 0 : SeqNum: k.SeqNum(),
268 0 : Kind: int(k.Kind()),
269 0 : })
270 0 : l.keyMap[lsmKey{string(k.UserKey), k.SeqNum(), int(k.Kind())}] = j
271 : }
272 : }
273 :
274 0 : func (l *lsmT) buildEdits(edits []*manifest.VersionEdit) error {
275 0 : l.state.Edits = nil
276 0 : l.state.StartEdit = l.startEdit
277 0 : l.state.Files = make(map[base.FileNum]lsmFileMetadata)
278 0 : var currentFiles [manifest.NumLevels][]*manifest.FileMetadata
279 0 :
280 0 : backings := make(map[base.DiskFileNum]*manifest.FileBacking)
281 0 :
282 0 : for _, ve := range edits {
283 0 : for _, i := range ve.CreatedBackingTables {
284 0 : backings[i.DiskFileNum] = i
285 0 : }
286 0 : if len(ve.DeletedFiles) == 0 && len(ve.NewFiles) == 0 {
287 0 : continue
288 : }
289 :
290 0 : edit := lsmVersionEdit{
291 0 : Reason: l.reason(ve),
292 0 : Added: make(map[int][]base.FileNum),
293 0 : Deleted: make(map[int][]base.FileNum),
294 0 : }
295 0 :
296 0 : for j := range ve.NewFiles {
297 0 : nf := &ve.NewFiles[j]
298 0 : if b, ok := backings[nf.BackingFileNum]; ok && nf.Meta.Virtual {
299 0 : nf.Meta.FileBacking = b
300 0 : }
301 0 : if _, ok := l.state.Files[nf.Meta.FileNum]; !ok {
302 0 : l.state.Files[nf.Meta.FileNum] = lsmFileMetadata{
303 0 : Size: nf.Meta.Size,
304 0 : Smallest: l.findKey(nf.Meta.Smallest),
305 0 : Largest: l.findKey(nf.Meta.Largest),
306 0 : SmallestSeqNum: nf.Meta.SmallestSeqNum,
307 0 : LargestSeqNum: nf.Meta.LargestSeqNum,
308 0 : Virtual: nf.Meta.Virtual,
309 0 : }
310 0 : }
311 0 : edit.Added[nf.Level] = append(edit.Added[nf.Level], nf.Meta.FileNum)
312 0 : currentFiles[nf.Level] = append(currentFiles[nf.Level], nf.Meta)
313 : }
314 :
315 0 : for df := range ve.DeletedFiles {
316 0 : edit.Deleted[df.Level] = append(edit.Deleted[df.Level], df.FileNum)
317 0 : for j, f := range currentFiles[df.Level] {
318 0 : if f.FileNum == df.FileNum {
319 0 : copy(currentFiles[df.Level][j:], currentFiles[df.Level][j+1:])
320 0 : currentFiles[df.Level] = currentFiles[df.Level][:len(currentFiles[df.Level])-1]
321 0 : }
322 : }
323 : }
324 :
325 0 : v := manifest.NewVersion(l.cmp, 0, currentFiles)
326 0 : edit.Sublevels = make(map[base.FileNum]int)
327 0 : for sublevel, files := range v.L0SublevelFiles {
328 0 : iter := files.Iter()
329 0 : for f := iter.First(); f != nil; f = iter.Next() {
330 0 : if len(l.state.Edits) > 0 {
331 0 : lastEdit := l.state.Edits[len(l.state.Edits)-1]
332 0 : if sublevel2, ok := lastEdit.Sublevels[f.FileNum]; ok && sublevel == sublevel2 {
333 0 : continue
334 : }
335 : }
336 0 : edit.Sublevels[f.FileNum] = sublevel
337 : }
338 : }
339 0 : l.state.Edits = append(l.state.Edits, edit)
340 : }
341 :
342 0 : if l.state.Edits == nil {
343 0 : return errors.Errorf("there are no edits in [start-edit, end-edit], which add or delete files")
344 0 : }
345 0 : return nil
346 : }
347 :
348 0 : func (l *lsmT) coalesceEdits(edits []*manifest.VersionEdit) ([]*manifest.VersionEdit, error) {
349 0 : if l.startEdit >= int64(len(edits)) {
350 0 : return nil, errors.Errorf("start-edit is more than the number of edits, %d", len(edits))
351 0 : }
352 :
353 0 : be := manifest.BulkVersionEdit{}
354 0 : be.AddedByFileNum = make(map[base.FileNum]*manifest.FileMetadata)
355 0 :
356 0 : // Coalesce all edits from [0, l.startEdit) into a BulkVersionEdit.
357 0 : for _, ve := range edits[:l.startEdit] {
358 0 : err := be.Accumulate(ve)
359 0 : if err != nil {
360 0 : return nil, err
361 0 : }
362 : }
363 :
364 0 : startingEdit := edits[l.startEdit]
365 0 : var beNewFiles []manifest.NewFileEntry
366 0 : beDeletedFiles := make(map[manifest.DeletedFileEntry]*manifest.FileMetadata)
367 0 :
368 0 : for level, deletedFiles := range be.Deleted {
369 0 : for _, file := range deletedFiles {
370 0 : dfe := manifest.DeletedFileEntry{
371 0 : Level: level,
372 0 : FileNum: file.FileNum,
373 0 : }
374 0 : beDeletedFiles[dfe] = file
375 0 : }
376 : }
377 :
378 : // Filter out added files that were also deleted in the BulkVersionEdit.
379 0 : for level, newFiles := range be.Added {
380 0 : for _, file := range newFiles {
381 0 : dfe := manifest.DeletedFileEntry{
382 0 : Level: level,
383 0 : FileNum: file.FileNum,
384 0 : }
385 0 :
386 0 : if _, ok := beDeletedFiles[dfe]; !ok {
387 0 : beNewFiles = append(beNewFiles, manifest.NewFileEntry{
388 0 : Level: level,
389 0 : Meta: file,
390 0 : })
391 0 : }
392 : }
393 : }
394 0 : startingEdit.NewFiles = append(beNewFiles, startingEdit.NewFiles...)
395 0 :
396 0 : edits = edits[l.startEdit:]
397 0 : return edits, nil
398 : }
399 :
400 0 : func (l *lsmT) findKey(key base.InternalKey) int {
401 0 : return l.keyMap[lsmKey{string(key.UserKey), key.SeqNum(), int(key.Kind())}]
402 0 : }
403 :
404 0 : func (l *lsmT) reason(ve *manifest.VersionEdit) string {
405 0 : if len(ve.DeletedFiles) > 0 {
406 0 : return "compacted"
407 0 : }
408 0 : if ve.MinUnflushedLogNum != 0 {
409 0 : return "flushed"
410 0 : }
411 0 : for i := range ve.NewFiles {
412 0 : nf := &ve.NewFiles[i]
413 0 : if nf.Meta.SmallestSeqNum == nf.Meta.LargestSeqNum {
414 0 : return "ingested"
415 0 : }
416 : }
417 0 : return "added"
418 : }
419 :
420 0 : func (l *lsmT) formatJSON(v interface{}) string {
421 0 : if l.pretty {
422 0 : return l.prettyJSON(v)
423 0 : }
424 0 : return l.uglyJSON(v)
425 : }
426 :
427 0 : func (l *lsmT) uglyJSON(v interface{}) string {
428 0 : data, err := json.Marshal(v)
429 0 : if err != nil {
430 0 : log.Fatal(err)
431 0 : }
432 0 : return string(data)
433 : }
434 :
435 0 : func (l *lsmT) prettyJSON(v interface{}) string {
436 0 : data, err := json.MarshalIndent(v, "", "\t")
437 0 : if err != nil {
438 0 : log.Fatal(err)
439 0 : }
440 0 : return string(data)
441 : }
|