Line data Source code
1 : // Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package metamorphic
6 :
7 : import (
8 : "bytes"
9 : "context"
10 : "crypto/rand"
11 : "encoding/binary"
12 : "fmt"
13 : "io"
14 : "path"
15 : "path/filepath"
16 : "slices"
17 : "strings"
18 :
19 : "github.com/cockroachdb/errors"
20 : "github.com/cockroachdb/pebble"
21 : "github.com/cockroachdb/pebble/internal/base"
22 : "github.com/cockroachdb/pebble/internal/keyspan"
23 : "github.com/cockroachdb/pebble/internal/private"
24 : "github.com/cockroachdb/pebble/internal/rangekey"
25 : "github.com/cockroachdb/pebble/internal/testkeys"
26 : "github.com/cockroachdb/pebble/objstorage/objstorageprovider"
27 : "github.com/cockroachdb/pebble/sstable"
28 : "github.com/cockroachdb/pebble/vfs"
29 : "github.com/cockroachdb/pebble/vfs/errorfs"
30 : )
31 :
32 : // Ops holds a sequence of operations to be executed by the metamorphic tests.
33 : type Ops []op
34 :
35 : // op defines the interface for a single operation, such as creating a batch,
36 : // or advancing an iterator.
37 : type op interface {
38 : String() string
39 :
40 : run(t *Test, h historyRecorder)
41 :
42 : // receiver returns the object ID of the object the operation is performed
43 : // on. Every operation has a receiver (eg, batch0.Set(...) has `batch0` as
44 : // its receiver). Receivers are used for synchronization when running with
45 : // concurrency.
46 : receiver() objID
47 :
48 : // syncObjs returns an additional set of object IDs—excluding the
49 : // receiver—that the operation must synchronize with. At execution time,
50 : // the operation will run serially with respect to all other operations
51 : // that return these objects from their own syncObjs or receiver methods.
52 : syncObjs() objIDSlice
53 :
54 : // keys returns all user keys used by the operation, as pointers to slices.
55 : // The caller can then modify these slices to rewrite the keys.
56 : //
57 : // Used for simplification of operations for easier investigations.
58 : keys() []*[]byte
59 :
60 : // diagramKeyRanges() returns key spans associated with this operation, to be
61 : // shown on an ASCII diagram of operations.
62 : diagramKeyRanges() []pebble.KeyRange
63 : }
64 :
65 : // initOp performs test initialization
66 : type initOp struct {
67 : dbSlots uint32
68 : batchSlots uint32
69 : iterSlots uint32
70 : snapshotSlots uint32
71 : externalObjSlots uint32
72 : }
73 :
74 1 : func (o *initOp) run(t *Test, h historyRecorder) {
75 1 : t.batches = make([]*pebble.Batch, o.batchSlots)
76 1 : t.iters = make([]*retryableIter, o.iterSlots)
77 1 : t.snapshots = make([]readerCloser, o.snapshotSlots)
78 1 : t.externalObjs = make([]externalObjMeta, o.externalObjSlots)
79 1 : h.Recordf("%s", o)
80 1 : }
81 :
82 1 : func (o *initOp) String() string {
83 1 : return fmt.Sprintf("Init(%d /* dbs */, %d /* batches */, %d /* iters */, %d /* snapshots */, %d /* externalObjs */)",
84 1 : o.dbSlots, o.batchSlots, o.iterSlots, o.snapshotSlots, o.externalObjSlots)
85 1 : }
86 :
87 1 : func (o *initOp) receiver() objID { return makeObjID(dbTag, 1) }
88 1 : func (o *initOp) syncObjs() objIDSlice {
89 1 : syncObjs := make([]objID, 0)
90 1 : // Add any additional DBs to syncObjs.
91 1 : for i := uint32(2); i < o.dbSlots+1; i++ {
92 0 : syncObjs = append(syncObjs, makeObjID(dbTag, i))
93 0 : }
94 1 : return syncObjs
95 : }
96 :
97 0 : func (o *initOp) keys() []*[]byte { return nil }
98 1 : func (o *initOp) diagramKeyRanges() []pebble.KeyRange { return nil }
99 :
100 : // applyOp models a Writer.Apply operation.
101 : type applyOp struct {
102 : writerID objID
103 : batchID objID
104 : }
105 :
106 1 : func (o *applyOp) run(t *Test, h historyRecorder) {
107 1 : b := t.getBatch(o.batchID)
108 1 : w := t.getWriter(o.writerID)
109 1 : var err error
110 1 : if o.writerID.tag() == dbTag && t.testOpts.asyncApplyToDB && t.writeOpts.Sync {
111 0 : err = w.(*pebble.DB).ApplyNoSyncWait(b, t.writeOpts)
112 0 : if err == nil {
113 0 : err = b.SyncWait()
114 0 : }
115 1 : } else {
116 1 : err = w.Apply(b, t.writeOpts)
117 1 : }
118 1 : h.Recordf("%s // %v", o, err)
119 : // batch will be closed by a closeOp which is guaranteed to be generated
120 : }
121 :
122 1 : func (o *applyOp) String() string { return fmt.Sprintf("%s.Apply(%s)", o.writerID, o.batchID) }
123 1 : func (o *applyOp) receiver() objID { return o.writerID }
124 1 : func (o *applyOp) syncObjs() objIDSlice {
125 1 : // Apply should not be concurrent with operations that are mutating the
126 1 : // batch.
127 1 : return []objID{o.batchID}
128 1 : }
129 :
130 0 : func (o *applyOp) keys() []*[]byte { return nil }
131 0 : func (o *applyOp) diagramKeyRanges() []pebble.KeyRange { return nil }
132 :
133 : // checkpointOp models a DB.Checkpoint operation.
134 : type checkpointOp struct {
135 : dbID objID
136 : // If non-empty, the checkpoint is restricted to these spans.
137 : spans []pebble.CheckpointSpan
138 : }
139 :
140 1 : func (o *checkpointOp) run(t *Test, h historyRecorder) {
141 1 : // TODO(josh): db.Checkpoint does not work with shared storage yet.
142 1 : // It would be better to filter out ahead of calling run on the op,
143 1 : // by setting the weight that generator.go uses to zero, or similar.
144 1 : // But IIUC the ops are shared for ALL the metamorphic test runs, so
145 1 : // not sure how to do that easily:
146 1 : // https://github.com/cockroachdb/pebble/blob/master/metamorphic/meta.go#L177
147 1 : if t.testOpts.sharedStorageEnabled || t.testOpts.externalStorageEnabled {
148 1 : h.Recordf("%s // %v", o, nil)
149 1 : return
150 1 : }
151 1 : var opts []pebble.CheckpointOption
152 1 : if len(o.spans) > 0 {
153 1 : opts = append(opts, pebble.WithRestrictToSpans(o.spans))
154 1 : }
155 1 : db := t.getDB(o.dbID)
156 1 : err := t.withRetries(func() error {
157 1 : return db.Checkpoint(o.dir(t.dir, h.op), opts...)
158 1 : })
159 1 : h.Recordf("%s // %v", o, err)
160 : }
161 :
162 1 : func (o *checkpointOp) dir(dataDir string, idx int) string {
163 1 : return filepath.Join(dataDir, "checkpoints", fmt.Sprintf("op-%06d", idx))
164 1 : }
165 :
166 1 : func (o *checkpointOp) String() string {
167 1 : var spanStr bytes.Buffer
168 1 : for i, span := range o.spans {
169 1 : if i > 0 {
170 1 : spanStr.WriteString(",")
171 1 : }
172 1 : fmt.Fprintf(&spanStr, "%q,%q", span.Start, span.End)
173 : }
174 1 : return fmt.Sprintf("%s.Checkpoint(%s)", o.dbID, spanStr.String())
175 : }
176 :
177 1 : func (o *checkpointOp) receiver() objID { return o.dbID }
178 1 : func (o *checkpointOp) syncObjs() objIDSlice { return nil }
179 :
180 0 : func (o *checkpointOp) keys() []*[]byte {
181 0 : var res []*[]byte
182 0 : for i := range o.spans {
183 0 : res = append(res, &o.spans[i].Start, &o.spans[i].End)
184 0 : }
185 0 : return res
186 : }
187 :
188 0 : func (o *checkpointOp) diagramKeyRanges() []pebble.KeyRange {
189 0 : var res []pebble.KeyRange
190 0 : for i := range o.spans {
191 0 : res = append(res, pebble.KeyRange{
192 0 : Start: o.spans[i].Start,
193 0 : End: o.spans[i].End,
194 0 : })
195 0 : }
196 0 : return res
197 : }
198 :
199 : // downloadOp models a DB.Download operation.
200 : type downloadOp struct {
201 : dbID objID
202 : spans []pebble.DownloadSpan
203 : }
204 :
205 1 : func (o *downloadOp) run(t *Test, h historyRecorder) {
206 1 : db := t.getDB(o.dbID)
207 1 : err := t.withRetries(func() error {
208 1 : return db.Download(context.Background(), o.spans)
209 1 : })
210 1 : h.Recordf("%s // %v", o, err)
211 : }
212 :
213 1 : func (o *downloadOp) String() string {
214 1 : var spanStr bytes.Buffer
215 1 : for i, span := range o.spans {
216 1 : if i > 0 {
217 1 : spanStr.WriteString(", ")
218 1 : }
219 1 : fmt.Fprintf(&spanStr, "%q /* start */, %q /* end */, %v /* viaBackingFileDownload */",
220 1 : span.StartKey, span.EndKey, span.ViaBackingFileDownload)
221 : }
222 1 : return fmt.Sprintf("%s.Download(%s)", o.dbID, spanStr.String())
223 : }
224 :
225 1 : func (o *downloadOp) receiver() objID { return o.dbID }
226 1 : func (o downloadOp) syncObjs() objIDSlice { return nil }
227 :
228 0 : func (o *downloadOp) keys() []*[]byte {
229 0 : var res []*[]byte
230 0 : for i := range o.spans {
231 0 : res = append(res, &o.spans[i].StartKey, &o.spans[i].EndKey)
232 0 : }
233 0 : return res
234 : }
235 :
236 0 : func (o *downloadOp) diagramKeyRanges() []pebble.KeyRange {
237 0 : var res []pebble.KeyRange
238 0 : for i := range o.spans {
239 0 : res = append(res, pebble.KeyRange{
240 0 : Start: o.spans[i].StartKey,
241 0 : End: o.spans[i].EndKey,
242 0 : })
243 0 : }
244 0 : return res
245 : }
246 :
247 : // closeOp models a {Batch,Iterator,Snapshot}.Close operation.
248 : type closeOp struct {
249 : objID objID
250 :
251 : // affectedObjects is the list of additional objects that are affected by this
252 : // operation, and which syncObjs() must return so that we don't perform the
253 : // close in parallel with other operations to affected objects.
254 : affectedObjects []objID
255 : }
256 :
257 1 : func (o *closeOp) run(t *Test, h historyRecorder) {
258 1 : c := t.getCloser(o.objID)
259 1 : if o.objID.tag() == dbTag && t.opts.DisableWAL {
260 1 : // Special case: If WAL is disabled, do a flush right before DB Close. This
261 1 : // allows us to reuse this run's data directory as initial state for
262 1 : // future runs without losing any mutations.
263 1 : _ = t.getDB(o.objID).Flush()
264 1 : }
265 1 : t.clearObj(o.objID)
266 1 : err := c.Close()
267 1 : h.Recordf("%s // %v", o, err)
268 : }
269 :
270 1 : func (o *closeOp) String() string { return fmt.Sprintf("%s.Close()", o.objID) }
271 1 : func (o *closeOp) receiver() objID { return o.objID }
272 1 : func (o *closeOp) syncObjs() objIDSlice {
273 1 : return o.affectedObjects
274 1 : }
275 :
276 0 : func (o *closeOp) keys() []*[]byte { return nil }
277 0 : func (o *closeOp) diagramKeyRanges() []pebble.KeyRange { return nil }
278 :
279 : // compactOp models a DB.Compact operation.
280 : type compactOp struct {
281 : dbID objID
282 : start []byte
283 : end []byte
284 : parallelize bool
285 : }
286 :
287 1 : func (o *compactOp) run(t *Test, h historyRecorder) {
288 1 : err := t.withRetries(func() error {
289 1 : return t.getDB(o.dbID).Compact(o.start, o.end, o.parallelize)
290 1 : })
291 1 : h.Recordf("%s // %v", o, err)
292 : }
293 :
294 1 : func (o *compactOp) String() string {
295 1 : return fmt.Sprintf("%s.Compact(%q, %q, %t /* parallelize */)", o.dbID, o.start, o.end, o.parallelize)
296 1 : }
297 :
298 1 : func (o *compactOp) receiver() objID { return o.dbID }
299 1 : func (o *compactOp) syncObjs() objIDSlice { return nil }
300 :
301 1 : func (o *compactOp) keys() []*[]byte {
302 1 : return []*[]byte{&o.start, &o.end}
303 1 : }
304 :
305 1 : func (o *compactOp) diagramKeyRanges() []pebble.KeyRange {
306 1 : return []pebble.KeyRange{{Start: o.start, End: o.end}}
307 1 : }
308 :
309 : // deleteOp models a Write.Delete operation.
310 : type deleteOp struct {
311 : writerID objID
312 : key []byte
313 :
314 : derivedDBID objID
315 : }
316 :
317 1 : func (o *deleteOp) run(t *Test, h historyRecorder) {
318 1 : w := t.getWriter(o.writerID)
319 1 : var err error
320 1 : if t.testOpts.deleteSized && t.isFMV(o.derivedDBID, pebble.FormatDeleteSizedAndObsolete) {
321 1 : // Call DeleteSized with a deterministic size derived from the index.
322 1 : // The size does not need to be accurate for correctness.
323 1 : err = w.DeleteSized(o.key, hashSize(t.idx), t.writeOpts)
324 1 : } else {
325 1 : err = w.Delete(o.key, t.writeOpts)
326 1 : }
327 1 : h.Recordf("%s // %v", o, err)
328 : }
329 :
330 1 : func hashSize(index int) uint32 {
331 1 : // Fibonacci hash https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
332 1 : return uint32((11400714819323198485 * uint64(index)) % maxValueSize)
333 1 : }
334 :
335 1 : func (o *deleteOp) String() string {
336 1 : return fmt.Sprintf("%s.Delete(%q)", o.writerID, o.key)
337 1 : }
338 1 : func (o *deleteOp) receiver() objID { return o.writerID }
339 1 : func (o *deleteOp) syncObjs() objIDSlice { return nil }
340 :
341 0 : func (o *deleteOp) keys() []*[]byte {
342 0 : return []*[]byte{&o.key}
343 0 : }
344 :
345 0 : func (o *deleteOp) diagramKeyRanges() []pebble.KeyRange {
346 0 : return []pebble.KeyRange{{Start: o.key, End: o.key}}
347 0 : }
348 :
349 : // singleDeleteOp models a Write.SingleDelete operation.
350 : type singleDeleteOp struct {
351 : writerID objID
352 : key []byte
353 : maybeReplaceDelete bool
354 : }
355 :
356 1 : func (o *singleDeleteOp) run(t *Test, h historyRecorder) {
357 1 : w := t.getWriter(o.writerID)
358 1 : var err error
359 1 : if t.testOpts.replaceSingleDelete && o.maybeReplaceDelete {
360 1 : err = w.Delete(o.key, t.writeOpts)
361 1 : } else {
362 1 : err = w.SingleDelete(o.key, t.writeOpts)
363 1 : }
364 : // NOTE: even if the SINGLEDEL was replaced with a DELETE, we must still
365 : // write the former to the history log. The log line will indicate whether
366 : // or not the delete *could* have been replaced. The OPTIONS file should
367 : // also be consulted to determine what happened at runtime (i.e. by taking
368 : // the logical AND).
369 1 : h.Recordf("%s // %v", o, err)
370 : }
371 :
372 1 : func (o *singleDeleteOp) String() string {
373 1 : return fmt.Sprintf("%s.SingleDelete(%q, %v /* maybeReplaceDelete */)", o.writerID, o.key, o.maybeReplaceDelete)
374 1 : }
375 :
376 1 : func (o *singleDeleteOp) receiver() objID { return o.writerID }
377 1 : func (o *singleDeleteOp) syncObjs() objIDSlice { return nil }
378 :
379 0 : func (o *singleDeleteOp) keys() []*[]byte {
380 0 : return []*[]byte{&o.key}
381 0 : }
382 :
383 0 : func (o *singleDeleteOp) diagramKeyRanges() []pebble.KeyRange {
384 0 : return []pebble.KeyRange{{Start: o.key, End: o.key}}
385 0 : }
386 :
387 : // deleteRangeOp models a Write.DeleteRange operation.
388 : type deleteRangeOp struct {
389 : writerID objID
390 : start []byte
391 : end []byte
392 : }
393 :
394 1 : func (o *deleteRangeOp) run(t *Test, h historyRecorder) {
395 1 : w := t.getWriter(o.writerID)
396 1 : err := w.DeleteRange(o.start, o.end, t.writeOpts)
397 1 : h.Recordf("%s // %v", o, err)
398 1 : }
399 :
400 1 : func (o *deleteRangeOp) String() string {
401 1 : return fmt.Sprintf("%s.DeleteRange(%q, %q)", o.writerID, o.start, o.end)
402 1 : }
403 :
404 1 : func (o *deleteRangeOp) receiver() objID { return o.writerID }
405 1 : func (o *deleteRangeOp) syncObjs() objIDSlice { return nil }
406 :
407 0 : func (o *deleteRangeOp) keys() []*[]byte {
408 0 : return []*[]byte{&o.start, &o.end}
409 0 : }
410 :
411 0 : func (o *deleteRangeOp) diagramKeyRanges() []pebble.KeyRange {
412 0 : return []pebble.KeyRange{{Start: o.start, End: o.end}}
413 0 : }
414 :
415 : // flushOp models a DB.Flush operation.
416 : type flushOp struct {
417 : db objID
418 : }
419 :
420 1 : func (o *flushOp) run(t *Test, h historyRecorder) {
421 1 : db := t.getDB(o.db)
422 1 : err := db.Flush()
423 1 : h.Recordf("%s // %v", o, err)
424 1 : }
425 :
426 1 : func (o *flushOp) String() string { return fmt.Sprintf("%s.Flush()", o.db) }
427 1 : func (o *flushOp) receiver() objID { return o.db }
428 1 : func (o *flushOp) syncObjs() objIDSlice { return nil }
429 0 : func (o *flushOp) keys() []*[]byte { return nil }
430 0 : func (o *flushOp) diagramKeyRanges() []pebble.KeyRange { return nil }
431 :
432 : // mergeOp models a Write.Merge operation.
433 : type mergeOp struct {
434 : writerID objID
435 : key []byte
436 : value []byte
437 : }
438 :
439 1 : func (o *mergeOp) run(t *Test, h historyRecorder) {
440 1 : w := t.getWriter(o.writerID)
441 1 : err := w.Merge(o.key, o.value, t.writeOpts)
442 1 : h.Recordf("%s // %v", o, err)
443 1 : }
444 :
445 1 : func (o *mergeOp) String() string { return fmt.Sprintf("%s.Merge(%q, %q)", o.writerID, o.key, o.value) }
446 1 : func (o *mergeOp) receiver() objID { return o.writerID }
447 1 : func (o *mergeOp) syncObjs() objIDSlice { return nil }
448 :
449 0 : func (o *mergeOp) keys() []*[]byte {
450 0 : return []*[]byte{&o.key}
451 0 : }
452 :
453 0 : func (o *mergeOp) diagramKeyRanges() []pebble.KeyRange {
454 0 : return []pebble.KeyRange{{Start: o.key, End: o.key}}
455 0 : }
456 :
457 : // setOp models a Write.Set operation.
458 : type setOp struct {
459 : writerID objID
460 : key []byte
461 : value []byte
462 : }
463 :
464 1 : func (o *setOp) run(t *Test, h historyRecorder) {
465 1 : w := t.getWriter(o.writerID)
466 1 : err := w.Set(o.key, o.value, t.writeOpts)
467 1 : h.Recordf("%s // %v", o, err)
468 1 : }
469 :
470 1 : func (o *setOp) String() string { return fmt.Sprintf("%s.Set(%q, %q)", o.writerID, o.key, o.value) }
471 1 : func (o *setOp) receiver() objID { return o.writerID }
472 1 : func (o *setOp) syncObjs() objIDSlice { return nil }
473 :
474 0 : func (o *setOp) keys() []*[]byte {
475 0 : return []*[]byte{&o.key}
476 0 : }
477 :
478 0 : func (o *setOp) diagramKeyRanges() []pebble.KeyRange {
479 0 : return []pebble.KeyRange{{Start: o.key, End: o.key}}
480 0 : }
481 :
482 : // rangeKeyDeleteOp models a Write.RangeKeyDelete operation.
483 : type rangeKeyDeleteOp struct {
484 : writerID objID
485 : start []byte
486 : end []byte
487 : }
488 :
489 1 : func (o *rangeKeyDeleteOp) run(t *Test, h historyRecorder) {
490 1 : w := t.getWriter(o.writerID)
491 1 : err := w.RangeKeyDelete(o.start, o.end, t.writeOpts)
492 1 : h.Recordf("%s // %v", o, err)
493 1 : }
494 :
495 1 : func (o *rangeKeyDeleteOp) String() string {
496 1 : return fmt.Sprintf("%s.RangeKeyDelete(%q, %q)", o.writerID, o.start, o.end)
497 1 : }
498 :
499 1 : func (o *rangeKeyDeleteOp) receiver() objID { return o.writerID }
500 1 : func (o *rangeKeyDeleteOp) syncObjs() objIDSlice { return nil }
501 :
502 0 : func (o *rangeKeyDeleteOp) keys() []*[]byte {
503 0 : return []*[]byte{&o.start, &o.end}
504 0 : }
505 :
506 1 : func (o *rangeKeyDeleteOp) diagramKeyRanges() []pebble.KeyRange {
507 1 : return []pebble.KeyRange{{Start: o.start, End: o.end}}
508 1 : }
509 :
510 : // rangeKeySetOp models a Write.RangeKeySet operation.
511 : type rangeKeySetOp struct {
512 : writerID objID
513 : start []byte
514 : end []byte
515 : suffix []byte
516 : value []byte
517 : }
518 :
519 1 : func (o *rangeKeySetOp) run(t *Test, h historyRecorder) {
520 1 : w := t.getWriter(o.writerID)
521 1 : err := w.RangeKeySet(o.start, o.end, o.suffix, o.value, t.writeOpts)
522 1 : h.Recordf("%s // %v", o, err)
523 1 : }
524 :
525 1 : func (o *rangeKeySetOp) String() string {
526 1 : return fmt.Sprintf("%s.RangeKeySet(%q, %q, %q, %q)",
527 1 : o.writerID, o.start, o.end, o.suffix, o.value)
528 1 : }
529 :
530 1 : func (o *rangeKeySetOp) receiver() objID { return o.writerID }
531 1 : func (o *rangeKeySetOp) syncObjs() objIDSlice { return nil }
532 :
533 1 : func (o *rangeKeySetOp) keys() []*[]byte {
534 1 : return []*[]byte{&o.start, &o.end}
535 1 : }
536 :
537 1 : func (o *rangeKeySetOp) diagramKeyRanges() []pebble.KeyRange {
538 1 : return []pebble.KeyRange{{Start: o.start, End: o.end}}
539 1 : }
540 :
541 : // rangeKeyUnsetOp models a Write.RangeKeyUnset operation.
542 : type rangeKeyUnsetOp struct {
543 : writerID objID
544 : start []byte
545 : end []byte
546 : suffix []byte
547 : }
548 :
549 1 : func (o *rangeKeyUnsetOp) run(t *Test, h historyRecorder) {
550 1 : w := t.getWriter(o.writerID)
551 1 : err := w.RangeKeyUnset(o.start, o.end, o.suffix, t.writeOpts)
552 1 : h.Recordf("%s // %v", o, err)
553 1 : }
554 :
555 1 : func (o *rangeKeyUnsetOp) String() string {
556 1 : return fmt.Sprintf("%s.RangeKeyUnset(%q, %q, %q)",
557 1 : o.writerID, o.start, o.end, o.suffix)
558 1 : }
559 :
560 1 : func (o *rangeKeyUnsetOp) receiver() objID { return o.writerID }
561 1 : func (o *rangeKeyUnsetOp) syncObjs() objIDSlice { return nil }
562 :
563 0 : func (o *rangeKeyUnsetOp) keys() []*[]byte {
564 0 : return []*[]byte{&o.start, &o.end}
565 0 : }
566 :
567 0 : func (o *rangeKeyUnsetOp) diagramKeyRanges() []pebble.KeyRange {
568 0 : return []pebble.KeyRange{{Start: o.start, End: o.end}}
569 0 : }
570 :
571 : // logDataOp models a Writer.LogData operation.
572 : type logDataOp struct {
573 : writerID objID
574 : data []byte
575 : }
576 :
577 1 : func (o *logDataOp) run(t *Test, h historyRecorder) {
578 1 : w := t.getWriter(o.writerID)
579 1 : err := w.LogData(o.data, t.writeOpts)
580 1 : h.Recordf("%s // %v", o, err)
581 1 : }
582 :
583 1 : func (o *logDataOp) String() string {
584 1 : return fmt.Sprintf("%s.LogData(%q)", o.writerID, o.data)
585 1 : }
586 :
587 1 : func (o *logDataOp) receiver() objID { return o.writerID }
588 1 : func (o *logDataOp) syncObjs() objIDSlice { return nil }
589 0 : func (o *logDataOp) keys() []*[]byte { return []*[]byte{} }
590 0 : func (o *logDataOp) diagramKeyRanges() []pebble.KeyRange { return []pebble.KeyRange{} }
591 :
592 : // newBatchOp models a Write.NewBatch operation.
593 : type newBatchOp struct {
594 : dbID objID
595 : batchID objID
596 : }
597 :
598 1 : func (o *newBatchOp) run(t *Test, h historyRecorder) {
599 1 : b := t.getDB(o.dbID).NewBatch()
600 1 : t.setBatch(o.batchID, b)
601 1 : h.Recordf("%s", o)
602 1 : }
603 :
604 1 : func (o *newBatchOp) String() string { return fmt.Sprintf("%s = %s.NewBatch()", o.batchID, o.dbID) }
605 1 : func (o *newBatchOp) receiver() objID { return o.dbID }
606 1 : func (o *newBatchOp) syncObjs() objIDSlice {
607 1 : // NewBatch should not be concurrent with operations that interact with that
608 1 : // same batch.
609 1 : return []objID{o.batchID}
610 1 : }
611 :
612 0 : func (o *newBatchOp) keys() []*[]byte { return nil }
613 0 : func (o *newBatchOp) diagramKeyRanges() []pebble.KeyRange { return nil }
614 :
615 : // newIndexedBatchOp models a Write.NewIndexedBatch operation.
616 : type newIndexedBatchOp struct {
617 : dbID objID
618 : batchID objID
619 : }
620 :
621 1 : func (o *newIndexedBatchOp) run(t *Test, h historyRecorder) {
622 1 : b := t.getDB(o.dbID).NewIndexedBatch()
623 1 : t.setBatch(o.batchID, b)
624 1 : h.Recordf("%s", o)
625 1 : }
626 :
627 1 : func (o *newIndexedBatchOp) String() string {
628 1 : return fmt.Sprintf("%s = %s.NewIndexedBatch()", o.batchID, o.dbID)
629 1 : }
630 1 : func (o *newIndexedBatchOp) receiver() objID { return o.dbID }
631 1 : func (o *newIndexedBatchOp) syncObjs() objIDSlice {
632 1 : // NewIndexedBatch should not be concurrent with operations that interact
633 1 : // with that same batch.
634 1 : return []objID{o.batchID}
635 1 : }
636 :
637 0 : func (o *newIndexedBatchOp) keys() []*[]byte { return nil }
638 0 : func (o *newIndexedBatchOp) diagramKeyRanges() []pebble.KeyRange { return nil }
639 :
640 : // batchCommitOp models a Batch.Commit operation.
641 : type batchCommitOp struct {
642 : dbID objID
643 : batchID objID
644 : }
645 :
646 1 : func (o *batchCommitOp) run(t *Test, h historyRecorder) {
647 1 : b := t.getBatch(o.batchID)
648 1 : err := b.Commit(t.writeOpts)
649 1 : h.Recordf("%s // %v", o, err)
650 1 : }
651 :
652 1 : func (o *batchCommitOp) String() string { return fmt.Sprintf("%s.Commit()", o.batchID) }
653 1 : func (o *batchCommitOp) receiver() objID { return o.batchID }
654 1 : func (o *batchCommitOp) syncObjs() objIDSlice {
655 1 : // Synchronize on the database so that NewIters wait for the commit.
656 1 : return []objID{o.dbID}
657 1 : }
658 :
659 0 : func (o *batchCommitOp) keys() []*[]byte { return nil }
660 0 : func (o *batchCommitOp) diagramKeyRanges() []pebble.KeyRange { return nil }
661 :
662 : // ingestOp models a DB.Ingest operation.
663 : type ingestOp struct {
664 : dbID objID
665 : batchIDs []objID
666 :
667 : derivedDBIDs []objID
668 : }
669 :
670 1 : func (o *ingestOp) run(t *Test, h historyRecorder) {
671 1 : // We can only use apply as an alternative for ingestion if we are ingesting
672 1 : // a single batch. If we are ingesting multiple batches, the batches may
673 1 : // overlap which would cause ingestion to fail but apply would succeed.
674 1 : if t.testOpts.ingestUsingApply && len(o.batchIDs) == 1 && o.derivedDBIDs[0] == o.dbID {
675 1 : id := o.batchIDs[0]
676 1 : b := t.getBatch(id)
677 1 : iter, rangeDelIter, rangeKeyIter := private.BatchSort(b)
678 1 : db := t.getDB(o.dbID)
679 1 : c, err := o.collapseBatch(t, db, iter, rangeDelIter, rangeKeyIter, b)
680 1 : if err == nil {
681 1 : err = db.Apply(c, t.writeOpts)
682 1 : }
683 1 : _ = b.Close()
684 1 : _ = c.Close()
685 1 : t.clearObj(id)
686 1 : h.Recordf("%s // %v", o, err)
687 1 : return
688 : }
689 :
690 1 : var paths []string
691 1 : var err error
692 1 : for i, id := range o.batchIDs {
693 1 : b := t.getBatch(id)
694 1 : t.clearObj(id)
695 1 : path, _, err2 := buildForIngest(t, o.dbID, b, i)
696 1 : if err2 != nil {
697 0 : h.Recordf("Build(%s) // %v", id, err2)
698 0 : }
699 1 : err = firstError(err, err2)
700 1 : if err2 == nil {
701 1 : paths = append(paths, path)
702 1 : }
703 1 : err = firstError(err, b.Close())
704 : }
705 :
706 1 : err = firstError(err, t.withRetries(func() error {
707 1 : return t.getDB(o.dbID).Ingest(context.Background(), paths)
708 1 : }))
709 :
710 1 : h.Recordf("%s // %v", o, err)
711 : }
712 :
713 1 : func (o *ingestOp) receiver() objID { return o.dbID }
714 1 : func (o *ingestOp) syncObjs() objIDSlice {
715 1 : // Ingest should not be concurrent with mutating the batches that will be
716 1 : // ingested as sstables.
717 1 : objs := make([]objID, 0, len(o.batchIDs)+1)
718 1 : objs = append(objs, o.batchIDs...)
719 1 : addedDBs := make(map[objID]struct{})
720 1 : for i := range o.derivedDBIDs {
721 1 : _, ok := addedDBs[o.derivedDBIDs[i]]
722 1 : if !ok && o.derivedDBIDs[i] != o.dbID {
723 0 : objs = append(objs, o.derivedDBIDs[i])
724 0 : addedDBs[o.derivedDBIDs[i]] = struct{}{}
725 0 : }
726 : }
727 1 : return objs
728 : }
729 :
730 : func closeIters(
731 : pointIter base.InternalIterator,
732 : rangeDelIter keyspan.FragmentIterator,
733 : rangeKeyIter keyspan.FragmentIterator,
734 1 : ) {
735 1 : if pointIter != nil {
736 1 : pointIter.Close()
737 1 : }
738 1 : if rangeDelIter != nil {
739 1 : rangeDelIter.Close()
740 1 : }
741 1 : if rangeKeyIter != nil {
742 1 : rangeKeyIter.Close()
743 1 : }
744 : }
745 :
746 : // collapseBatch collapses the mutations in a batch to be equivalent to an
747 : // sstable ingesting those mutations. Duplicate updates to a key are collapsed
748 : // so that only the latest update is performed. All range deletions are
749 : // performed first in the batch to match the semantics of ingestion where a
750 : // range deletion does not delete a point record contained in the sstable.
751 : func (o *ingestOp) collapseBatch(
752 : t *Test,
753 : db *pebble.DB,
754 : pointIter base.InternalIterator,
755 : rangeDelIter, rangeKeyIter keyspan.FragmentIterator,
756 : b *pebble.Batch,
757 1 : ) (*pebble.Batch, error) {
758 1 : defer closeIters(pointIter, rangeDelIter, rangeKeyIter)
759 1 : equal := t.opts.Comparer.Equal
760 1 : collapsed := db.NewBatch()
761 1 :
762 1 : if rangeDelIter != nil {
763 1 : // NB: The range tombstones have already been fragmented by the Batch.
764 1 : t, err := rangeDelIter.First()
765 1 : for ; t != nil; t, err = rangeDelIter.Next() {
766 1 : // NB: We don't have to copy the key or value since we're reading from a
767 1 : // batch which doesn't do prefix compression.
768 1 : if err := collapsed.DeleteRange(t.Start, t.End, nil); err != nil {
769 0 : return nil, err
770 0 : }
771 : }
772 1 : if err != nil {
773 0 : return nil, err
774 0 : }
775 1 : rangeDelIter.Close()
776 1 : rangeDelIter = nil
777 : }
778 :
779 1 : if pointIter != nil {
780 1 : var lastUserKey []byte
781 1 : for kv := pointIter.First(); kv != nil; kv = pointIter.Next() {
782 1 : // Ignore duplicate keys.
783 1 : //
784 1 : // Note: this is necessary due to MERGE keys, otherwise it would be
785 1 : // fine to include all the keys in the batch and let the normal
786 1 : // sequence number precedence determine which of the keys "wins".
787 1 : // But the code to build the ingested sstable will only keep the
788 1 : // most recent internal key and will not merge across internal keys.
789 1 : if equal(lastUserKey, kv.K.UserKey) {
790 1 : continue
791 : }
792 : // NB: We don't have to copy the key or value since we're reading from a
793 : // batch which doesn't do prefix compression.
794 1 : lastUserKey = kv.K.UserKey
795 1 :
796 1 : var err error
797 1 : switch kv.Kind() {
798 1 : case pebble.InternalKeyKindDelete:
799 1 : err = collapsed.Delete(kv.K.UserKey, nil)
800 1 : case pebble.InternalKeyKindDeleteSized:
801 1 : v, _ := binary.Uvarint(kv.InPlaceValue())
802 1 : // Batch.DeleteSized takes just the length of the value being
803 1 : // deleted and adds the key's length to derive the overall entry
804 1 : // size of the value being deleted. This has already been done
805 1 : // to the key we're reading from the batch, so we must subtract
806 1 : // the key length from the encoded value before calling
807 1 : // collapsed.DeleteSized, which will again add the key length
808 1 : // before encoding.
809 1 : err = collapsed.DeleteSized(kv.K.UserKey, uint32(v-uint64(len(kv.K.UserKey))), nil)
810 1 : case pebble.InternalKeyKindSingleDelete:
811 1 : err = collapsed.SingleDelete(kv.K.UserKey, nil)
812 1 : case pebble.InternalKeyKindSet:
813 1 : err = collapsed.Set(kv.K.UserKey, kv.InPlaceValue(), nil)
814 1 : case pebble.InternalKeyKindMerge:
815 1 : err = collapsed.Merge(kv.K.UserKey, kv.InPlaceValue(), nil)
816 0 : case pebble.InternalKeyKindLogData:
817 0 : err = collapsed.LogData(kv.K.UserKey, nil)
818 0 : default:
819 0 : err = errors.Errorf("unknown batch record kind: %d", kv.Kind())
820 : }
821 1 : if err != nil {
822 0 : return nil, err
823 0 : }
824 : }
825 1 : if err := pointIter.Close(); err != nil {
826 0 : return nil, err
827 0 : }
828 1 : pointIter = nil
829 : }
830 :
831 : // There's no equivalent of a MERGE operator for range keys, so there's no
832 : // need to collapse the range keys here. Rather than reading the range keys
833 : // from `rangeKeyIter`, which will already be fragmented, read the range
834 : // keys from the batch and copy them verbatim. This marginally improves our
835 : // test coverage over the alternative approach of pre-fragmenting and
836 : // pre-coalescing before writing to the batch.
837 : //
838 : // The `rangeKeyIter` is used only to determine if there are any range keys
839 : // in the batch at all, and only because we already have it handy from
840 : // private.BatchSort.
841 1 : if rangeKeyIter != nil {
842 1 : for r := b.Reader(); ; {
843 1 : kind, key, value, ok, err := r.Next()
844 1 : if !ok {
845 1 : if err != nil {
846 0 : return nil, err
847 0 : }
848 1 : break
849 1 : } else if !rangekey.IsRangeKey(kind) {
850 1 : continue
851 : }
852 1 : ik := base.MakeInternalKey(key, 0, kind)
853 1 : if err := collapsed.AddInternalKey(&ik, value, nil); err != nil {
854 0 : return nil, err
855 0 : }
856 : }
857 1 : rangeKeyIter.Close()
858 1 : rangeKeyIter = nil
859 : }
860 :
861 1 : return collapsed, nil
862 : }
863 :
864 1 : func (o *ingestOp) String() string {
865 1 : var buf strings.Builder
866 1 : buf.WriteString(o.dbID.String())
867 1 : buf.WriteString(".Ingest(")
868 1 : for i, id := range o.batchIDs {
869 1 : if i > 0 {
870 1 : buf.WriteString(", ")
871 1 : }
872 1 : buf.WriteString(id.String())
873 : }
874 1 : buf.WriteString(")")
875 1 : return buf.String()
876 : }
877 :
878 0 : func (o *ingestOp) keys() []*[]byte { return nil }
879 0 : func (o *ingestOp) diagramKeyRanges() []pebble.KeyRange { return nil }
880 :
881 : type ingestAndExciseOp struct {
882 : dbID objID
883 : batchID objID
884 : derivedDBID objID
885 : exciseStart, exciseEnd []byte
886 : }
887 :
888 1 : func (o *ingestAndExciseOp) run(t *Test, h historyRecorder) {
889 1 : var err error
890 1 : b := t.getBatch(o.batchID)
891 1 : t.clearObj(o.batchID)
892 1 : if t.testOpts.Opts.Comparer.Compare(o.exciseEnd, o.exciseStart) <= 0 {
893 0 : panic("non-well-formed excise span")
894 : }
895 1 : db := t.getDB(o.dbID)
896 1 : if b.Empty() {
897 1 : h.Recordf("%s // %v", o, o.simulateExcise(db, t))
898 1 : return
899 1 : }
900 :
901 1 : path, writerMeta, err2 := buildForIngest(t, o.dbID, b, 0 /* i */)
902 1 : if err2 != nil {
903 0 : h.Recordf("Build(%s) // %v", o.batchID, err2)
904 0 : return
905 0 : }
906 1 : err = firstError(err, b.Close())
907 1 :
908 1 : if writerMeta.Properties.NumEntries == 0 && writerMeta.Properties.NumRangeKeys() == 0 {
909 0 : h.Recordf("%s // %v", o, o.simulateExcise(db, t))
910 0 : return
911 0 : }
912 :
913 1 : if t.testOpts.useExcise {
914 1 : err = firstError(err, t.withRetries(func() error {
915 1 : _, err := db.IngestAndExcise(context.Background(), []string{path}, nil /* shared */, nil /* external */, pebble.KeyRange{
916 1 : Start: o.exciseStart,
917 1 : End: o.exciseEnd,
918 1 : })
919 1 : return err
920 1 : }))
921 1 : } else {
922 1 : err = firstError(err, o.simulateExcise(db, t))
923 1 : err = firstError(err, t.withRetries(func() error {
924 1 : return db.Ingest(context.Background(), []string{path})
925 1 : }))
926 : }
927 :
928 1 : h.Recordf("%s // %v", o, err)
929 : }
930 :
931 1 : func (o *ingestAndExciseOp) simulateExcise(db *pebble.DB, t *Test) error {
932 1 : // Simulate the excise using a DeleteRange and RangeKeyDelete.
933 1 : return errors.CombineErrors(
934 1 : db.DeleteRange(o.exciseStart, o.exciseEnd, t.writeOpts),
935 1 : db.RangeKeyDelete(o.exciseStart, o.exciseEnd, t.writeOpts),
936 1 : )
937 1 : }
938 :
939 1 : func (o *ingestAndExciseOp) receiver() objID { return o.dbID }
940 1 : func (o *ingestAndExciseOp) syncObjs() objIDSlice {
941 1 : // Ingest should not be concurrent with mutating the batches that will be
942 1 : // ingested as sstables.
943 1 : objs := []objID{o.batchID}
944 1 : if o.derivedDBID != o.dbID {
945 0 : objs = append(objs, o.derivedDBID)
946 0 : }
947 1 : return objs
948 : }
949 :
950 1 : func (o *ingestAndExciseOp) String() string {
951 1 : return fmt.Sprintf("%s.IngestAndExcise(%s, %q, %q)", o.dbID, o.batchID, o.exciseStart, o.exciseEnd)
952 1 : }
953 :
954 0 : func (o *ingestAndExciseOp) keys() []*[]byte {
955 0 : return []*[]byte{&o.exciseStart, &o.exciseEnd}
956 0 : }
957 :
958 0 : func (o *ingestAndExciseOp) diagramKeyRanges() []pebble.KeyRange {
959 0 : return []pebble.KeyRange{{Start: o.exciseStart, End: o.exciseEnd}}
960 0 : }
961 :
962 : // ingestExternalFilesOp models a DB.IngestExternalFiles operation.
963 : //
964 : // When remote storage is not enabled, the operation is emulated using the
965 : // regular DB.Ingest; this serves as a cross-check of the result.
966 : type ingestExternalFilesOp struct {
967 : dbID objID
968 : // The bounds of the objects cannot overlap.
969 : objs []externalObjWithBounds
970 : }
971 :
972 : type externalObjWithBounds struct {
973 : externalObjID objID
974 :
975 : // bounds for the external object. These bounds apply after keys undergo
976 : // any prefix or suffix transforms.
977 : bounds pebble.KeyRange
978 :
979 : syntheticPrefix sstable.SyntheticPrefix
980 : syntheticSuffix sstable.SyntheticSuffix
981 : }
982 :
983 1 : func (o *ingestExternalFilesOp) run(t *Test, h historyRecorder) {
984 1 : db := t.getDB(o.dbID)
985 1 :
986 1 : // Verify the objects exist (useful for --try-to-reduce).
987 1 : for i := range o.objs {
988 1 : t.getExternalObj(o.objs[i].externalObjID)
989 1 : }
990 :
991 1 : var err error
992 1 : if !t.testOpts.externalStorageEnabled {
993 1 : // Emulate the operation by crating local, truncated SST files and ingesting
994 1 : // them.
995 1 : var paths []string
996 1 : for i, obj := range o.objs {
997 1 : // Make sure the object exists and is not empty.
998 1 : path, sstMeta := buildForIngestExternalEmulation(
999 1 : t, o.dbID, obj.externalObjID, obj.bounds, obj.syntheticSuffix, obj.syntheticPrefix, i,
1000 1 : )
1001 1 : if sstMeta.HasPointKeys || sstMeta.HasRangeKeys || sstMeta.HasRangeDelKeys {
1002 1 : paths = append(paths, path)
1003 1 : }
1004 : }
1005 1 : if len(paths) > 0 {
1006 1 : err = db.Ingest(context.Background(), paths)
1007 1 : }
1008 0 : } else {
1009 0 : external := make([]pebble.ExternalFile, len(o.objs))
1010 0 : for i, obj := range o.objs {
1011 0 : meta := t.getExternalObj(obj.externalObjID)
1012 0 : external[i] = pebble.ExternalFile{
1013 0 : Locator: "external",
1014 0 : ObjName: externalObjName(obj.externalObjID),
1015 0 : Size: meta.sstMeta.Size,
1016 0 : StartKey: obj.bounds.Start,
1017 0 : EndKey: obj.bounds.End,
1018 0 : EndKeyIsInclusive: false,
1019 0 : // Note: if the table has point/range keys, we don't know for sure whether
1020 0 : // this particular range has any, but that's acceptable.
1021 0 : HasPointKey: meta.sstMeta.HasPointKeys || meta.sstMeta.HasRangeDelKeys,
1022 0 : HasRangeKey: meta.sstMeta.HasRangeKeys,
1023 0 : SyntheticSuffix: obj.syntheticSuffix,
1024 0 : }
1025 0 : if obj.syntheticPrefix.IsSet() {
1026 0 : external[i].SyntheticPrefix = obj.syntheticPrefix
1027 0 : }
1028 : }
1029 0 : _, err = db.IngestExternalFiles(context.Background(), external)
1030 : }
1031 :
1032 1 : h.Recordf("%s // %v", o, err)
1033 : }
1034 :
1035 1 : func (o *ingestExternalFilesOp) receiver() objID { return o.dbID }
1036 1 : func (o *ingestExternalFilesOp) syncObjs() objIDSlice {
1037 1 : res := make(objIDSlice, len(o.objs))
1038 1 : for i := range res {
1039 1 : res[i] = o.objs[i].externalObjID
1040 1 : }
1041 : // Deduplicate the IDs.
1042 1 : slices.Sort(res)
1043 1 : return slices.Compact(res)
1044 : }
1045 :
1046 1 : func (o *ingestExternalFilesOp) String() string {
1047 1 : strs := make([]string, len(o.objs))
1048 1 : for i, obj := range o.objs {
1049 1 : strs[i] = fmt.Sprintf("%s, %q /* start */, %q /* end */, %q /* syntheticSuffix */, %q /* syntheticPrefix */",
1050 1 : obj.externalObjID, obj.bounds.Start, obj.bounds.End, obj.syntheticSuffix, obj.syntheticPrefix,
1051 1 : )
1052 1 : }
1053 1 : return fmt.Sprintf("%s.IngestExternalFiles(%s)", o.dbID, strings.Join(strs, ", "))
1054 : }
1055 :
1056 0 : func (o *ingestExternalFilesOp) keys() []*[]byte {
1057 0 : // If any of the objects have synthetic prefixes, we can't allow modification
1058 0 : // of external object bounds.
1059 0 : for i := range o.objs {
1060 0 : if o.objs[i].syntheticPrefix.IsSet() {
1061 0 : return nil
1062 0 : }
1063 : }
1064 :
1065 0 : var res []*[]byte
1066 0 : for i := range o.objs {
1067 0 : res = append(res, &o.objs[i].bounds.Start, &o.objs[i].bounds.End)
1068 0 : }
1069 0 : return res
1070 : }
1071 :
1072 0 : func (o *ingestExternalFilesOp) diagramKeyRanges() []pebble.KeyRange {
1073 0 : ranges := make([]pebble.KeyRange, len(o.objs))
1074 0 : for i, obj := range o.objs {
1075 0 : ranges[i] = obj.bounds
1076 0 : }
1077 0 : return ranges
1078 : }
1079 :
1080 : // getOp models a Reader.Get operation.
1081 : type getOp struct {
1082 : readerID objID
1083 : key []byte
1084 : derivedDBID objID
1085 : }
1086 :
1087 1 : func (o *getOp) run(t *Test, h historyRecorder) {
1088 1 : r := t.getReader(o.readerID)
1089 1 : var val []byte
1090 1 : var closer io.Closer
1091 1 : err := t.withRetries(func() (err error) {
1092 1 : val, closer, err = r.Get(o.key)
1093 1 : return err
1094 1 : })
1095 1 : h.Recordf("%s // [%q] %v", o, val, err)
1096 1 : if closer != nil {
1097 1 : closer.Close()
1098 1 : }
1099 : }
1100 :
1101 1 : func (o *getOp) String() string { return fmt.Sprintf("%s.Get(%q)", o.readerID, o.key) }
1102 1 : func (o *getOp) receiver() objID { return o.readerID }
1103 1 : func (o *getOp) syncObjs() objIDSlice {
1104 1 : if o.readerID.tag() == dbTag {
1105 1 : return nil
1106 1 : }
1107 : // batch.Get reads through to the current database state.
1108 1 : if o.derivedDBID != 0 {
1109 1 : return []objID{o.derivedDBID}
1110 1 : }
1111 0 : return nil
1112 : }
1113 :
1114 0 : func (o *getOp) keys() []*[]byte {
1115 0 : return []*[]byte{&o.key}
1116 0 : }
1117 :
1118 0 : func (o *getOp) diagramKeyRanges() []pebble.KeyRange {
1119 0 : return []pebble.KeyRange{{Start: o.key, End: o.key}}
1120 0 : }
1121 :
1122 : // newIterOp models a Reader.NewIter operation.
1123 : type newIterOp struct {
1124 : readerID objID
1125 : iterID objID
1126 : iterOpts
1127 : derivedDBID objID
1128 : }
1129 :
1130 : // Enable this to enable debug logging of range key iterator operations.
1131 : const debugIterators = false
1132 :
1133 1 : func (o *newIterOp) run(t *Test, h historyRecorder) {
1134 1 : r := t.getReader(o.readerID)
1135 1 : opts := iterOptions(o.iterOpts)
1136 1 : if debugIterators {
1137 0 : opts.DebugRangeKeyStack = true
1138 0 : }
1139 :
1140 1 : var i *pebble.Iterator
1141 1 : for {
1142 1 : i, _ = r.NewIter(opts)
1143 1 : if err := i.Error(); !errors.Is(err, errorfs.ErrInjected) {
1144 1 : break
1145 : }
1146 : // close this iter and retry NewIter
1147 0 : _ = i.Close()
1148 : }
1149 1 : t.setIter(o.iterID, i)
1150 1 :
1151 1 : // Trash the bounds to ensure that Pebble doesn't rely on the stability of
1152 1 : // the user-provided bounds.
1153 1 : if opts != nil {
1154 1 : rand.Read(opts.LowerBound[:])
1155 1 : rand.Read(opts.UpperBound[:])
1156 1 : }
1157 1 : h.Recordf("%s // %v", o, i.Error())
1158 : }
1159 :
1160 1 : func (o *newIterOp) String() string {
1161 1 : return fmt.Sprintf("%s = %s.NewIter(%q, %q, %d /* key types */, %d, %d, %t /* use L6 filters */, %q /* masking suffix */)",
1162 1 : o.iterID, o.readerID, o.lower, o.upper, o.keyTypes, o.filterMin, o.filterMax, o.useL6Filters, o.maskSuffix)
1163 1 : }
1164 :
1165 1 : func (o *newIterOp) receiver() objID { return o.readerID }
1166 1 : func (o *newIterOp) syncObjs() objIDSlice {
1167 1 : // Prevent o.iterID ops from running before it exists.
1168 1 : objs := []objID{o.iterID}
1169 1 : // If reading through a batch or snapshot, the new iterator will also observe database
1170 1 : // state, and we must synchronize on the database state for a consistent
1171 1 : // view.
1172 1 : if o.readerID.tag() == batchTag || o.readerID.tag() == snapTag {
1173 1 : objs = append(objs, o.derivedDBID)
1174 1 : }
1175 1 : return objs
1176 : }
1177 :
1178 0 : func (o *newIterOp) keys() []*[]byte {
1179 0 : var res []*[]byte
1180 0 : if o.lower != nil {
1181 0 : res = append(res, &o.lower)
1182 0 : }
1183 0 : if o.upper != nil {
1184 0 : res = append(res, &o.upper)
1185 0 : }
1186 0 : return res
1187 : }
1188 :
1189 1 : func (o *newIterOp) diagramKeyRanges() []pebble.KeyRange {
1190 1 : var res []pebble.KeyRange
1191 1 : if o.lower != nil {
1192 0 : res = append(res, pebble.KeyRange{Start: o.lower, End: o.lower})
1193 0 : }
1194 1 : if o.upper != nil {
1195 0 : res = append(res, pebble.KeyRange{Start: o.upper, End: o.upper})
1196 0 : }
1197 1 : return res
1198 : }
1199 :
1200 : // newIterUsingCloneOp models a Iterator.Clone operation.
1201 : type newIterUsingCloneOp struct {
1202 : existingIterID objID
1203 : iterID objID
1204 : refreshBatch bool
1205 : iterOpts
1206 :
1207 : // derivedReaderID is the ID of the underlying reader that backs both the
1208 : // existing iterator and the new iterator. The derivedReaderID is NOT
1209 : // serialized by String and is derived from other operations during parse.
1210 : derivedReaderID objID
1211 : }
1212 :
1213 1 : func (o *newIterUsingCloneOp) run(t *Test, h historyRecorder) {
1214 1 : iter := t.getIter(o.existingIterID)
1215 1 : cloneOpts := pebble.CloneOptions{
1216 1 : IterOptions: iterOptions(o.iterOpts),
1217 1 : RefreshBatchView: o.refreshBatch,
1218 1 : }
1219 1 : i, err := iter.iter.Clone(cloneOpts)
1220 1 : if err != nil {
1221 0 : panic(err)
1222 : }
1223 1 : t.setIter(o.iterID, i)
1224 1 : h.Recordf("%s // %v", o, i.Error())
1225 : }
1226 :
1227 1 : func (o *newIterUsingCloneOp) String() string {
1228 1 : return fmt.Sprintf("%s = %s.Clone(%t, %q, %q, %d /* key types */, %d, %d, %t /* use L6 filters */, %q /* masking suffix */)",
1229 1 : o.iterID, o.existingIterID, o.refreshBatch, o.lower, o.upper,
1230 1 : o.keyTypes, o.filterMin, o.filterMax, o.useL6Filters, o.maskSuffix)
1231 1 : }
1232 :
1233 1 : func (o *newIterUsingCloneOp) receiver() objID { return o.existingIterID }
1234 :
1235 1 : func (o *newIterUsingCloneOp) syncObjs() objIDSlice {
1236 1 : objIDs := []objID{o.iterID}
1237 1 : // If the underlying reader is a batch, we must synchronize with the batch.
1238 1 : // If refreshBatch=true, synchronizing is necessary to observe all the
1239 1 : // mutations up to until this op and no more. Even when refreshBatch=false,
1240 1 : // we must synchronize because iterator construction may access state cached
1241 1 : // on the indexed batch to avoid refragmenting range tombstones or range
1242 1 : // keys.
1243 1 : if o.derivedReaderID.tag() == batchTag {
1244 0 : objIDs = append(objIDs, o.derivedReaderID)
1245 0 : }
1246 1 : return objIDs
1247 : }
1248 :
1249 0 : func (o *newIterUsingCloneOp) keys() []*[]byte { return nil }
1250 0 : func (o *newIterUsingCloneOp) diagramKeyRanges() []pebble.KeyRange { return nil }
1251 :
1252 : // iterSetBoundsOp models an Iterator.SetBounds operation.
1253 : type iterSetBoundsOp struct {
1254 : iterID objID
1255 : lower []byte
1256 : upper []byte
1257 : }
1258 :
1259 1 : func (o *iterSetBoundsOp) run(t *Test, h historyRecorder) {
1260 1 : i := t.getIter(o.iterID)
1261 1 : var lower, upper []byte
1262 1 : if o.lower != nil {
1263 1 : lower = append(lower, o.lower...)
1264 1 : }
1265 1 : if o.upper != nil {
1266 1 : upper = append(upper, o.upper...)
1267 1 : }
1268 1 : i.SetBounds(lower, upper)
1269 1 :
1270 1 : // Trash the bounds to ensure that Pebble doesn't rely on the stability of
1271 1 : // the user-provided bounds.
1272 1 : rand.Read(lower[:])
1273 1 : rand.Read(upper[:])
1274 1 :
1275 1 : h.Recordf("%s // %v", o, i.Error())
1276 : }
1277 :
1278 1 : func (o *iterSetBoundsOp) String() string {
1279 1 : return fmt.Sprintf("%s.SetBounds(%q, %q)", o.iterID, o.lower, o.upper)
1280 1 : }
1281 :
1282 1 : func (o *iterSetBoundsOp) receiver() objID { return o.iterID }
1283 1 : func (o *iterSetBoundsOp) syncObjs() objIDSlice { return nil }
1284 :
1285 0 : func (o *iterSetBoundsOp) keys() []*[]byte {
1286 0 : return []*[]byte{&o.lower, &o.upper}
1287 0 : }
1288 :
1289 0 : func (o *iterSetBoundsOp) diagramKeyRanges() []pebble.KeyRange {
1290 0 : return []pebble.KeyRange{{Start: o.lower, End: o.upper}}
1291 0 : }
1292 :
1293 : // iterSetOptionsOp models an Iterator.SetOptions operation.
1294 : type iterSetOptionsOp struct {
1295 : iterID objID
1296 : iterOpts
1297 :
1298 : // derivedReaderID is the ID of the underlying reader that backs the
1299 : // iterator. The derivedReaderID is NOT serialized by String and is derived
1300 : // from other operations during parse.
1301 : derivedReaderID objID
1302 : }
1303 :
1304 1 : func (o *iterSetOptionsOp) run(t *Test, h historyRecorder) {
1305 1 : i := t.getIter(o.iterID)
1306 1 :
1307 1 : opts := iterOptions(o.iterOpts)
1308 1 : if opts == nil {
1309 0 : opts = &pebble.IterOptions{}
1310 0 : }
1311 1 : i.SetOptions(opts)
1312 1 :
1313 1 : // Trash the bounds to ensure that Pebble doesn't rely on the stability of
1314 1 : // the user-provided bounds.
1315 1 : rand.Read(opts.LowerBound[:])
1316 1 : rand.Read(opts.UpperBound[:])
1317 1 :
1318 1 : h.Recordf("%s // %v", o, i.Error())
1319 : }
1320 :
1321 1 : func (o *iterSetOptionsOp) String() string {
1322 1 : return fmt.Sprintf("%s.SetOptions(%q, %q, %d /* key types */, %d, %d, %t /* use L6 filters */, %q /* masking suffix */)",
1323 1 : o.iterID, o.lower, o.upper, o.keyTypes, o.filterMin, o.filterMax, o.useL6Filters, o.maskSuffix)
1324 1 : }
1325 :
1326 1 : func iterOptions(o iterOpts) *pebble.IterOptions {
1327 1 : if o.IsZero() && !debugIterators {
1328 1 : return nil
1329 1 : }
1330 1 : var lower, upper []byte
1331 1 : if o.lower != nil {
1332 1 : lower = append(lower, o.lower...)
1333 1 : }
1334 1 : if o.upper != nil {
1335 1 : upper = append(upper, o.upper...)
1336 1 : }
1337 1 : opts := &pebble.IterOptions{
1338 1 : LowerBound: lower,
1339 1 : UpperBound: upper,
1340 1 : KeyTypes: pebble.IterKeyType(o.keyTypes),
1341 1 : RangeKeyMasking: pebble.RangeKeyMasking{
1342 1 : Suffix: o.maskSuffix,
1343 1 : },
1344 1 : UseL6Filters: o.useL6Filters,
1345 1 : DebugRangeKeyStack: debugIterators,
1346 1 : }
1347 1 : if opts.RangeKeyMasking.Suffix != nil {
1348 1 : opts.RangeKeyMasking.Filter = func() pebble.BlockPropertyFilterMask {
1349 1 : return sstable.NewTestKeysMaskingFilter()
1350 1 : }
1351 : }
1352 1 : if o.filterMax > 0 {
1353 1 : opts.PointKeyFilters = []pebble.BlockPropertyFilter{
1354 1 : sstable.NewTestKeysBlockPropertyFilter(o.filterMin, o.filterMax),
1355 1 : }
1356 1 : // Enforce the timestamp bounds in SkipPoint, so that the iterator never
1357 1 : // returns a key outside the filterMin, filterMax bounds. This provides
1358 1 : // deterministic iteration.
1359 1 : opts.SkipPoint = func(k []byte) (skip bool) {
1360 1 : n := testkeys.Comparer.Split(k)
1361 1 : if n == len(k) {
1362 1 : // No suffix, don't skip it.
1363 1 : return false
1364 1 : }
1365 1 : v, err := testkeys.ParseSuffix(k[n:])
1366 1 : if err != nil {
1367 0 : panic(err)
1368 : }
1369 1 : ts := uint64(v)
1370 1 : return ts < o.filterMin || ts >= o.filterMax
1371 : }
1372 : }
1373 1 : return opts
1374 : }
1375 :
1376 1 : func (o *iterSetOptionsOp) receiver() objID { return o.iterID }
1377 :
1378 1 : func (o *iterSetOptionsOp) syncObjs() objIDSlice {
1379 1 : if o.derivedReaderID.tag() == batchTag {
1380 0 : // If the underlying reader is a batch, we must synchronize with the
1381 0 : // batch so that we observe all the mutations up until this operation
1382 0 : // and no more.
1383 0 : return []objID{o.derivedReaderID}
1384 0 : }
1385 1 : return nil
1386 : }
1387 :
1388 0 : func (o *iterSetOptionsOp) keys() []*[]byte { return nil }
1389 0 : func (o *iterSetOptionsOp) diagramKeyRanges() []pebble.KeyRange { return nil }
1390 :
1391 : // iterSeekGEOp models an Iterator.SeekGE[WithLimit] operation.
1392 : type iterSeekGEOp struct {
1393 : iterID objID
1394 : key []byte
1395 : limit []byte
1396 :
1397 : derivedReaderID objID
1398 : }
1399 :
1400 1 : func iteratorPos(i *retryableIter) string {
1401 1 : var buf bytes.Buffer
1402 1 : fmt.Fprintf(&buf, "%q", i.Key())
1403 1 : hasPoint, hasRange := i.HasPointAndRange()
1404 1 : if hasPoint {
1405 1 : fmt.Fprintf(&buf, ",%q", i.Value())
1406 1 : } else {
1407 1 : fmt.Fprint(&buf, ",<no point>")
1408 1 : }
1409 1 : if hasRange {
1410 1 : start, end := i.RangeBounds()
1411 1 : fmt.Fprintf(&buf, ",[%q,%q)=>{", start, end)
1412 1 : for i, rk := range i.RangeKeys() {
1413 1 : if i > 0 {
1414 1 : fmt.Fprint(&buf, ",")
1415 1 : }
1416 1 : fmt.Fprintf(&buf, "%q=%q", rk.Suffix, rk.Value)
1417 : }
1418 1 : fmt.Fprint(&buf, "}")
1419 1 : } else {
1420 1 : fmt.Fprint(&buf, ",<no range>")
1421 1 : }
1422 1 : if i.RangeKeyChanged() {
1423 1 : fmt.Fprint(&buf, "*")
1424 1 : }
1425 1 : return buf.String()
1426 : }
1427 :
1428 1 : func validBoolToStr(valid bool) string {
1429 1 : return fmt.Sprintf("%t", valid)
1430 1 : }
1431 :
1432 1 : func validityStateToStr(validity pebble.IterValidityState) (bool, string) {
1433 1 : // We can't distinguish between IterExhausted and IterAtLimit in a
1434 1 : // deterministic manner.
1435 1 : switch validity {
1436 1 : case pebble.IterExhausted, pebble.IterAtLimit:
1437 1 : return false, "invalid"
1438 1 : case pebble.IterValid:
1439 1 : return true, "valid"
1440 0 : default:
1441 0 : panic("unknown validity")
1442 : }
1443 : }
1444 :
1445 1 : func (o *iterSeekGEOp) run(t *Test, h historyRecorder) {
1446 1 : i := t.getIter(o.iterID)
1447 1 : var valid bool
1448 1 : var validStr string
1449 1 : if o.limit == nil {
1450 1 : valid = i.SeekGE(o.key)
1451 1 : validStr = validBoolToStr(valid)
1452 1 : } else {
1453 1 : valid, validStr = validityStateToStr(i.SeekGEWithLimit(o.key, o.limit))
1454 1 : }
1455 1 : if valid {
1456 1 : h.Recordf("%s // [%s,%s] %v", o, validStr, iteratorPos(i), i.Error())
1457 1 : } else {
1458 1 : h.Recordf("%s // [%s] %v", o, validStr, i.Error())
1459 1 : }
1460 : }
1461 :
1462 1 : func (o *iterSeekGEOp) String() string {
1463 1 : return fmt.Sprintf("%s.SeekGE(%q, %q)", o.iterID, o.key, o.limit)
1464 1 : }
1465 1 : func (o *iterSeekGEOp) receiver() objID { return o.iterID }
1466 1 : func (o *iterSeekGEOp) syncObjs() objIDSlice { return onlyBatchIDs(o.derivedReaderID) }
1467 :
1468 0 : func (o *iterSeekGEOp) keys() []*[]byte {
1469 0 : return []*[]byte{&o.key}
1470 0 : }
1471 :
1472 1 : func (o *iterSeekGEOp) diagramKeyRanges() []pebble.KeyRange {
1473 1 : return []pebble.KeyRange{{Start: o.key, End: o.key}}
1474 1 : }
1475 :
1476 1 : func onlyBatchIDs(ids ...objID) objIDSlice {
1477 1 : var ret objIDSlice
1478 1 : for _, id := range ids {
1479 1 : if id.tag() == batchTag {
1480 0 : ret = append(ret, id)
1481 0 : }
1482 : }
1483 1 : return ret
1484 : }
1485 :
1486 : // iterSeekPrefixGEOp models an Iterator.SeekPrefixGE operation.
1487 : type iterSeekPrefixGEOp struct {
1488 : iterID objID
1489 : key []byte
1490 :
1491 : derivedReaderID objID
1492 : }
1493 :
1494 1 : func (o *iterSeekPrefixGEOp) run(t *Test, h historyRecorder) {
1495 1 : i := t.getIter(o.iterID)
1496 1 : valid := i.SeekPrefixGE(o.key)
1497 1 : if valid {
1498 1 : h.Recordf("%s // [%t,%s] %v", o, valid, iteratorPos(i), i.Error())
1499 1 : } else {
1500 1 : h.Recordf("%s // [%t] %v", o, valid, i.Error())
1501 1 : }
1502 : }
1503 :
1504 1 : func (o *iterSeekPrefixGEOp) String() string {
1505 1 : return fmt.Sprintf("%s.SeekPrefixGE(%q)", o.iterID, o.key)
1506 1 : }
1507 1 : func (o *iterSeekPrefixGEOp) receiver() objID { return o.iterID }
1508 1 : func (o *iterSeekPrefixGEOp) syncObjs() objIDSlice { return onlyBatchIDs(o.derivedReaderID) }
1509 :
1510 0 : func (o *iterSeekPrefixGEOp) keys() []*[]byte {
1511 0 : return []*[]byte{&o.key}
1512 0 : }
1513 :
1514 0 : func (o *iterSeekPrefixGEOp) diagramKeyRanges() []pebble.KeyRange {
1515 0 : return []pebble.KeyRange{{Start: o.key, End: o.key}}
1516 0 : }
1517 :
1518 : // iterSeekLTOp models an Iterator.SeekLT[WithLimit] operation.
1519 : type iterSeekLTOp struct {
1520 : iterID objID
1521 : key []byte
1522 : limit []byte
1523 :
1524 : derivedReaderID objID
1525 : }
1526 :
1527 1 : func (o *iterSeekLTOp) run(t *Test, h historyRecorder) {
1528 1 : i := t.getIter(o.iterID)
1529 1 : var valid bool
1530 1 : var validStr string
1531 1 : if o.limit == nil {
1532 1 : valid = i.SeekLT(o.key)
1533 1 : validStr = validBoolToStr(valid)
1534 1 : } else {
1535 1 : valid, validStr = validityStateToStr(i.SeekLTWithLimit(o.key, o.limit))
1536 1 : }
1537 1 : if valid {
1538 1 : h.Recordf("%s // [%s,%s] %v", o, validStr, iteratorPos(i), i.Error())
1539 1 : } else {
1540 1 : h.Recordf("%s // [%s] %v", o, validStr, i.Error())
1541 1 : }
1542 : }
1543 :
1544 1 : func (o *iterSeekLTOp) String() string {
1545 1 : return fmt.Sprintf("%s.SeekLT(%q, %q)", o.iterID, o.key, o.limit)
1546 1 : }
1547 :
1548 1 : func (o *iterSeekLTOp) receiver() objID { return o.iterID }
1549 1 : func (o *iterSeekLTOp) syncObjs() objIDSlice { return onlyBatchIDs(o.derivedReaderID) }
1550 :
1551 0 : func (o *iterSeekLTOp) keys() []*[]byte {
1552 0 : return []*[]byte{&o.key}
1553 0 : }
1554 :
1555 0 : func (o *iterSeekLTOp) diagramKeyRanges() []pebble.KeyRange {
1556 0 : return []pebble.KeyRange{{Start: o.key, End: o.key}}
1557 0 : }
1558 :
1559 : // iterFirstOp models an Iterator.First operation.
1560 : type iterFirstOp struct {
1561 : iterID objID
1562 :
1563 : derivedReaderID objID
1564 : }
1565 :
1566 1 : func (o *iterFirstOp) run(t *Test, h historyRecorder) {
1567 1 : i := t.getIter(o.iterID)
1568 1 : valid := i.First()
1569 1 : if valid {
1570 1 : h.Recordf("%s // [%t,%s] %v", o, valid, iteratorPos(i), i.Error())
1571 1 : } else {
1572 1 : h.Recordf("%s // [%t] %v", o, valid, i.Error())
1573 1 : }
1574 : }
1575 :
1576 1 : func (o *iterFirstOp) String() string { return fmt.Sprintf("%s.First()", o.iterID) }
1577 1 : func (o *iterFirstOp) receiver() objID { return o.iterID }
1578 1 : func (o *iterFirstOp) syncObjs() objIDSlice { return onlyBatchIDs(o.derivedReaderID) }
1579 :
1580 0 : func (o *iterFirstOp) keys() []*[]byte { return nil }
1581 0 : func (o *iterFirstOp) diagramKeyRanges() []pebble.KeyRange { return nil }
1582 :
1583 : // iterLastOp models an Iterator.Last operation.
1584 : type iterLastOp struct {
1585 : iterID objID
1586 :
1587 : derivedReaderID objID
1588 : }
1589 :
1590 1 : func (o *iterLastOp) run(t *Test, h historyRecorder) {
1591 1 : i := t.getIter(o.iterID)
1592 1 : valid := i.Last()
1593 1 : if valid {
1594 1 : h.Recordf("%s // [%t,%s] %v", o, valid, iteratorPos(i), i.Error())
1595 1 : } else {
1596 1 : h.Recordf("%s // [%t] %v", o, valid, i.Error())
1597 1 : }
1598 : }
1599 :
1600 1 : func (o *iterLastOp) String() string { return fmt.Sprintf("%s.Last()", o.iterID) }
1601 1 : func (o *iterLastOp) receiver() objID { return o.iterID }
1602 1 : func (o *iterLastOp) syncObjs() objIDSlice { return onlyBatchIDs(o.derivedReaderID) }
1603 :
1604 0 : func (o *iterLastOp) keys() []*[]byte { return nil }
1605 0 : func (o *iterLastOp) diagramKeyRanges() []pebble.KeyRange { return nil }
1606 :
1607 : // iterNextOp models an Iterator.Next[WithLimit] operation.
1608 : type iterNextOp struct {
1609 : iterID objID
1610 : limit []byte
1611 :
1612 : derivedReaderID objID
1613 : }
1614 :
1615 1 : func (o *iterNextOp) run(t *Test, h historyRecorder) {
1616 1 : i := t.getIter(o.iterID)
1617 1 : var valid bool
1618 1 : var validStr string
1619 1 : if o.limit == nil {
1620 1 : valid = i.Next()
1621 1 : validStr = validBoolToStr(valid)
1622 1 : } else {
1623 1 : valid, validStr = validityStateToStr(i.NextWithLimit(o.limit))
1624 1 : }
1625 1 : if valid {
1626 1 : h.Recordf("%s // [%s,%s] %v", o, validStr, iteratorPos(i), i.Error())
1627 1 : } else {
1628 1 : h.Recordf("%s // [%s] %v", o, validStr, i.Error())
1629 1 : }
1630 : }
1631 :
1632 1 : func (o *iterNextOp) String() string { return fmt.Sprintf("%s.Next(%q)", o.iterID, o.limit) }
1633 1 : func (o *iterNextOp) receiver() objID { return o.iterID }
1634 1 : func (o *iterNextOp) syncObjs() objIDSlice { return onlyBatchIDs(o.derivedReaderID) }
1635 :
1636 0 : func (o *iterNextOp) keys() []*[]byte { return nil }
1637 0 : func (o *iterNextOp) diagramKeyRanges() []pebble.KeyRange { return nil }
1638 :
1639 : // iterNextPrefixOp models an Iterator.NextPrefix operation.
1640 : type iterNextPrefixOp struct {
1641 : iterID objID
1642 :
1643 : derivedReaderID objID
1644 : }
1645 :
1646 1 : func (o *iterNextPrefixOp) run(t *Test, h historyRecorder) {
1647 1 : i := t.getIter(o.iterID)
1648 1 : valid := i.NextPrefix()
1649 1 : validStr := validBoolToStr(valid)
1650 1 : if valid {
1651 1 : h.Recordf("%s // [%s,%s] %v", o, validStr, iteratorPos(i), i.Error())
1652 1 : } else {
1653 1 : h.Recordf("%s // [%s] %v", o, validStr, i.Error())
1654 1 : }
1655 : }
1656 :
1657 1 : func (o *iterNextPrefixOp) String() string { return fmt.Sprintf("%s.NextPrefix()", o.iterID) }
1658 1 : func (o *iterNextPrefixOp) receiver() objID { return o.iterID }
1659 1 : func (o *iterNextPrefixOp) syncObjs() objIDSlice { return onlyBatchIDs(o.derivedReaderID) }
1660 :
1661 0 : func (o *iterNextPrefixOp) keys() []*[]byte { return nil }
1662 0 : func (o *iterNextPrefixOp) diagramKeyRanges() []pebble.KeyRange { return nil }
1663 :
1664 : // iterCanSingleDelOp models a call to CanDeterministicallySingleDelete with an
1665 : // Iterator.
1666 : type iterCanSingleDelOp struct {
1667 : iterID objID
1668 :
1669 : derivedReaderID objID
1670 : }
1671 :
1672 1 : func (o *iterCanSingleDelOp) run(t *Test, h historyRecorder) {
1673 1 : // TODO(jackson): When we perform error injection, we'll need to rethink
1674 1 : // this.
1675 1 : _, err := pebble.CanDeterministicallySingleDelete(t.getIter(o.iterID).iter)
1676 1 : // The return value of CanDeterministicallySingleDelete is dependent on
1677 1 : // internal LSM state and non-deterministic, so we don't record it.
1678 1 : // Including the operation within the metamorphic test at all helps ensure
1679 1 : // that it does not change the result of any other Iterator operation that
1680 1 : // should be deterministic, regardless of its own outcome.
1681 1 : //
1682 1 : // We still record the value of the error because it's deterministic, at
1683 1 : // least for now. The possible error cases are:
1684 1 : // - The iterator was already in an error state when the operation ran.
1685 1 : // - The operation is deterministically invalid (like using an InternalNext
1686 1 : // to change directions.)
1687 1 : h.Recordf("%s // %v", o, err)
1688 1 : }
1689 :
1690 1 : func (o *iterCanSingleDelOp) String() string { return fmt.Sprintf("%s.InternalNext()", o.iterID) }
1691 1 : func (o *iterCanSingleDelOp) receiver() objID { return o.iterID }
1692 1 : func (o *iterCanSingleDelOp) syncObjs() objIDSlice { return onlyBatchIDs(o.derivedReaderID) }
1693 :
1694 0 : func (o *iterCanSingleDelOp) keys() []*[]byte { return nil }
1695 0 : func (o *iterCanSingleDelOp) diagramKeyRanges() []pebble.KeyRange { return nil }
1696 :
1697 : // iterPrevOp models an Iterator.Prev[WithLimit] operation.
1698 : type iterPrevOp struct {
1699 : iterID objID
1700 : limit []byte
1701 :
1702 : derivedReaderID objID
1703 : }
1704 :
1705 1 : func (o *iterPrevOp) run(t *Test, h historyRecorder) {
1706 1 : i := t.getIter(o.iterID)
1707 1 : var valid bool
1708 1 : var validStr string
1709 1 : if o.limit == nil {
1710 1 : valid = i.Prev()
1711 1 : validStr = validBoolToStr(valid)
1712 1 : } else {
1713 1 : valid, validStr = validityStateToStr(i.PrevWithLimit(o.limit))
1714 1 : }
1715 1 : if valid {
1716 1 : h.Recordf("%s // [%s,%s] %v", o, validStr, iteratorPos(i), i.Error())
1717 1 : } else {
1718 1 : h.Recordf("%s // [%s] %v", o, validStr, i.Error())
1719 1 : }
1720 : }
1721 :
1722 1 : func (o *iterPrevOp) String() string { return fmt.Sprintf("%s.Prev(%q)", o.iterID, o.limit) }
1723 1 : func (o *iterPrevOp) receiver() objID { return o.iterID }
1724 1 : func (o *iterPrevOp) syncObjs() objIDSlice { return onlyBatchIDs(o.derivedReaderID) }
1725 :
1726 0 : func (o *iterPrevOp) keys() []*[]byte { return nil }
1727 0 : func (o *iterPrevOp) diagramKeyRanges() []pebble.KeyRange { return nil }
1728 :
1729 : // newSnapshotOp models a DB.NewSnapshot operation.
1730 : type newSnapshotOp struct {
1731 : dbID objID
1732 : snapID objID
1733 : // If nonempty, this snapshot must not be used to read any keys outside of
1734 : // the provided bounds. This allows some implementations to use 'Eventually
1735 : // file-only snapshots,' which require bounds.
1736 : bounds []pebble.KeyRange
1737 : }
1738 :
1739 1 : func (o *newSnapshotOp) run(t *Test, h historyRecorder) {
1740 1 : bounds := o.bounds
1741 1 : if len(bounds) == 0 {
1742 0 : panic("bounds unexpectedly unset for newSnapshotOp")
1743 : }
1744 : // Fibonacci hash https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
1745 1 : createEfos := ((11400714819323198485 * uint64(t.idx) * t.testOpts.seedEFOS) >> 63) == 1
1746 1 : // If either of these options is true, an EFOS _must_ be created, regardless
1747 1 : // of what the fibonacci hash returned.
1748 1 : excisePossible := t.testOpts.useSharedReplicate || t.testOpts.useExternalReplicate || t.testOpts.useExcise
1749 1 : if createEfos || excisePossible {
1750 1 : s := t.getDB(o.dbID).NewEventuallyFileOnlySnapshot(bounds)
1751 1 : t.setSnapshot(o.snapID, s)
1752 1 : } else {
1753 1 : s := t.getDB(o.dbID).NewSnapshot()
1754 1 : t.setSnapshot(o.snapID, s)
1755 1 : }
1756 1 : h.Recordf("%s", o)
1757 : }
1758 :
1759 1 : func (o *newSnapshotOp) String() string {
1760 1 : var buf bytes.Buffer
1761 1 : fmt.Fprintf(&buf, "%s = %s.NewSnapshot(", o.snapID, o.dbID)
1762 1 : for i := range o.bounds {
1763 1 : if i > 0 {
1764 1 : fmt.Fprint(&buf, ", ")
1765 1 : }
1766 1 : fmt.Fprintf(&buf, "%q, %q", o.bounds[i].Start, o.bounds[i].End)
1767 : }
1768 1 : fmt.Fprint(&buf, ")")
1769 1 : return buf.String()
1770 : }
1771 1 : func (o *newSnapshotOp) receiver() objID { return o.dbID }
1772 1 : func (o *newSnapshotOp) syncObjs() objIDSlice { return []objID{o.snapID} }
1773 :
1774 1 : func (o *newSnapshotOp) keys() []*[]byte {
1775 1 : var res []*[]byte
1776 1 : for i := range o.bounds {
1777 1 : res = append(res, &o.bounds[i].Start, &o.bounds[i].End)
1778 1 : }
1779 1 : return res
1780 : }
1781 :
1782 1 : func (o *newSnapshotOp) diagramKeyRanges() []pebble.KeyRange {
1783 1 : return o.bounds
1784 1 : }
1785 :
1786 : // newExternalObjOp models a DB.NewExternalObj operation.
1787 : type newExternalObjOp struct {
1788 : batchID objID
1789 : externalObjID objID
1790 : }
1791 :
1792 1 : func externalObjName(externalObjID objID) string {
1793 1 : if externalObjID.tag() != externalObjTag {
1794 0 : panic(fmt.Sprintf("invalid externalObjID %s", externalObjID))
1795 : }
1796 1 : return fmt.Sprintf("external-for-ingest-%d.sst", externalObjID.slot())
1797 : }
1798 :
1799 1 : func (o *newExternalObjOp) run(t *Test, h historyRecorder) {
1800 1 : b := t.getBatch(o.batchID)
1801 1 : t.clearObj(o.batchID)
1802 1 :
1803 1 : writeCloser, err := t.externalStorage.CreateObject(externalObjName(o.externalObjID))
1804 1 : if err != nil {
1805 0 : panic(err)
1806 : }
1807 1 : writable := objstorageprovider.NewRemoteWritable(writeCloser)
1808 1 :
1809 1 : iter, rangeDelIter, rangeKeyIter := private.BatchSort(b)
1810 1 :
1811 1 : sstMeta, err := writeSSTForIngestion(
1812 1 : t,
1813 1 : iter, rangeDelIter, rangeKeyIter,
1814 1 : true, /* uniquePrefixes */
1815 1 : nil, /* syntheticSuffix */
1816 1 : nil, /* syntheticPrefix */
1817 1 : writable,
1818 1 : t.minFMV(),
1819 1 : )
1820 1 : if err != nil {
1821 0 : panic(err)
1822 : }
1823 1 : if !sstMeta.HasPointKeys && !sstMeta.HasRangeDelKeys && !sstMeta.HasRangeKeys {
1824 0 : // This can occur when using --try-to-reduce.
1825 0 : panic("metamorphic test internal error: external object empty")
1826 : }
1827 1 : t.setExternalObj(o.externalObjID, externalObjMeta{
1828 1 : sstMeta: sstMeta,
1829 1 : })
1830 1 : h.Recordf("%s", o)
1831 : }
1832 :
1833 1 : func (o *newExternalObjOp) String() string {
1834 1 : return fmt.Sprintf("%s = %s.NewExternalObj()", o.externalObjID, o.batchID)
1835 1 : }
1836 1 : func (o *newExternalObjOp) receiver() objID { return o.batchID }
1837 1 : func (o *newExternalObjOp) syncObjs() objIDSlice { return []objID{o.externalObjID} }
1838 :
1839 0 : func (o *newExternalObjOp) keys() []*[]byte { return nil }
1840 0 : func (o *newExternalObjOp) diagramKeyRanges() []pebble.KeyRange { return nil }
1841 :
1842 : type dbRatchetFormatMajorVersionOp struct {
1843 : dbID objID
1844 : vers pebble.FormatMajorVersion
1845 : }
1846 :
1847 1 : func (o *dbRatchetFormatMajorVersionOp) run(t *Test, h historyRecorder) {
1848 1 : var err error
1849 1 : // NB: We no-op the operation if we're already at or above the provided
1850 1 : // format major version. Different runs start at different format major
1851 1 : // versions, making the presence of an error and the error message itself
1852 1 : // non-deterministic if we attempt to upgrade to an older version.
1853 1 : //
1854 1 : //Regardless, subsequent operations should behave identically, which is what
1855 1 : //we're really aiming to test by including this format major version ratchet
1856 1 : //operation.
1857 1 : if t.getDB(o.dbID).FormatMajorVersion() < o.vers {
1858 1 : err = t.getDB(o.dbID).RatchetFormatMajorVersion(o.vers)
1859 1 : }
1860 1 : h.Recordf("%s // %v", o, err)
1861 : }
1862 :
1863 1 : func (o *dbRatchetFormatMajorVersionOp) String() string {
1864 1 : return fmt.Sprintf("%s.RatchetFormatMajorVersion(%s)", o.dbID, o.vers)
1865 1 : }
1866 1 : func (o *dbRatchetFormatMajorVersionOp) receiver() objID { return o.dbID }
1867 1 : func (o *dbRatchetFormatMajorVersionOp) syncObjs() objIDSlice { return nil }
1868 :
1869 0 : func (o *dbRatchetFormatMajorVersionOp) keys() []*[]byte { return nil }
1870 0 : func (o *dbRatchetFormatMajorVersionOp) diagramKeyRanges() []pebble.KeyRange { return nil }
1871 :
1872 : type dbRestartOp struct {
1873 : dbID objID
1874 :
1875 : // affectedObjects is the list of additional objects that are affected by this
1876 : // operation, and which syncObjs() must return so that we don't perform the
1877 : // restart in parallel with other operations to affected objects.
1878 : affectedObjects []objID
1879 : }
1880 :
1881 1 : func (o *dbRestartOp) run(t *Test, h historyRecorder) {
1882 1 : if err := t.restartDB(o.dbID); err != nil {
1883 0 : h.Recordf("%s // %v", o, err)
1884 0 : h.history.err.Store(errors.Wrap(err, "dbRestartOp"))
1885 1 : } else {
1886 1 : h.Recordf("%s", o)
1887 1 : }
1888 : }
1889 :
1890 1 : func (o *dbRestartOp) String() string { return fmt.Sprintf("%s.Restart()", o.dbID) }
1891 1 : func (o *dbRestartOp) receiver() objID { return o.dbID }
1892 1 : func (o *dbRestartOp) syncObjs() objIDSlice { return o.affectedObjects }
1893 :
1894 0 : func (o *dbRestartOp) keys() []*[]byte { return nil }
1895 0 : func (o *dbRestartOp) diagramKeyRanges() []pebble.KeyRange { return nil }
1896 :
1897 1 : func formatOps(ops []op) string {
1898 1 : var buf strings.Builder
1899 1 : for _, op := range ops {
1900 1 : fmt.Fprintf(&buf, "%s\n", op)
1901 1 : }
1902 1 : return buf.String()
1903 : }
1904 :
1905 : // replicateOp models an operation that could copy keys from one db to
1906 : // another through either an IngestAndExcise, or an Ingest.
1907 : type replicateOp struct {
1908 : source, dest objID
1909 : start, end []byte
1910 : }
1911 :
1912 : func (r *replicateOp) runSharedReplicate(
1913 : t *Test, h historyRecorder, source, dest *pebble.DB, w *sstable.Writer, sstPath string,
1914 0 : ) {
1915 0 : var sharedSSTs []pebble.SharedSSTMeta
1916 0 : var err error
1917 0 : err = source.ScanInternal(context.TODO(), sstable.CategoryAndQoS{}, r.start, r.end,
1918 0 : func(key *pebble.InternalKey, value pebble.LazyValue, _ pebble.IteratorLevel) error {
1919 0 : val, _, err := value.Value(nil)
1920 0 : if err != nil {
1921 0 : panic(err)
1922 : }
1923 0 : return w.Raw().AddWithForceObsolete(base.MakeInternalKey(key.UserKey, 0, key.Kind()), val, false)
1924 : },
1925 0 : func(start, end []byte, seqNum base.SeqNum) error {
1926 0 : return w.DeleteRange(start, end)
1927 0 : },
1928 0 : func(start, end []byte, keys []keyspan.Key) error {
1929 0 : return w.Raw().EncodeSpan(keyspan.Span{
1930 0 : Start: start,
1931 0 : End: end,
1932 0 : Keys: keys,
1933 0 : })
1934 0 : },
1935 0 : func(sst *pebble.SharedSSTMeta) error {
1936 0 : sharedSSTs = append(sharedSSTs, *sst)
1937 0 : return nil
1938 0 : },
1939 : nil,
1940 : )
1941 0 : if err != nil {
1942 0 : h.Recordf("%s // %v", r, err)
1943 0 : return
1944 0 : }
1945 :
1946 0 : err = w.Close()
1947 0 : if err != nil {
1948 0 : h.Recordf("%s // %v", r, err)
1949 0 : return
1950 0 : }
1951 0 : meta, err := w.Raw().Metadata()
1952 0 : if err != nil {
1953 0 : h.Recordf("%s // %v", r, err)
1954 0 : return
1955 0 : }
1956 0 : if len(sharedSSTs) == 0 && meta.Properties.NumEntries == 0 && meta.Properties.NumRangeKeys() == 0 {
1957 0 : // IngestAndExcise below will be a no-op. We should do a
1958 0 : // DeleteRange+RangeKeyDel to mimic the behaviour of the non-shared-replicate
1959 0 : // case.
1960 0 : //
1961 0 : // TODO(bilal): Remove this when we support excises with no matching ingests.
1962 0 : if err := dest.RangeKeyDelete(r.start, r.end, t.writeOpts); err != nil {
1963 0 : h.Recordf("%s // %v", r, err)
1964 0 : return
1965 0 : }
1966 0 : err := dest.DeleteRange(r.start, r.end, t.writeOpts)
1967 0 : h.Recordf("%s // %v", r, err)
1968 0 : return
1969 : }
1970 :
1971 0 : _, err = dest.IngestAndExcise(context.Background(), []string{sstPath}, sharedSSTs, nil /* external */, pebble.KeyRange{Start: r.start, End: r.end})
1972 0 : h.Recordf("%s // %v", r, err)
1973 : }
1974 :
1975 : func (r *replicateOp) runExternalReplicate(
1976 : t *Test, h historyRecorder, source, dest *pebble.DB, w *sstable.Writer, sstPath string,
1977 0 : ) {
1978 0 : var externalSSTs []pebble.ExternalFile
1979 0 : var err error
1980 0 : err = source.ScanInternal(context.TODO(), sstable.CategoryAndQoS{}, r.start, r.end,
1981 0 : func(key *pebble.InternalKey, value pebble.LazyValue, _ pebble.IteratorLevel) error {
1982 0 : val, _, err := value.Value(nil)
1983 0 : if err != nil {
1984 0 : panic(err)
1985 : }
1986 0 : return w.Raw().AddWithForceObsolete(base.MakeInternalKey(key.UserKey, 0, key.Kind()), val, false)
1987 : },
1988 0 : func(start, end []byte, seqNum base.SeqNum) error {
1989 0 : return w.DeleteRange(start, end)
1990 0 : },
1991 0 : func(start, end []byte, keys []keyspan.Key) error {
1992 0 : return w.Raw().EncodeSpan(keyspan.Span{
1993 0 : Start: start,
1994 0 : End: end,
1995 0 : Keys: keys,
1996 0 : })
1997 0 : },
1998 : nil,
1999 0 : func(sst *pebble.ExternalFile) error {
2000 0 : externalSSTs = append(externalSSTs, *sst)
2001 0 : return nil
2002 0 : },
2003 : )
2004 0 : if err != nil {
2005 0 : h.Recordf("%s // %v", r, err)
2006 0 : return
2007 0 : }
2008 :
2009 0 : err = w.Close()
2010 0 : if err != nil {
2011 0 : h.Recordf("%s // %v", r, err)
2012 0 : return
2013 0 : }
2014 0 : meta, err := w.Raw().Metadata()
2015 0 : if err != nil {
2016 0 : h.Recordf("%s // %v", r, err)
2017 0 : return
2018 0 : }
2019 0 : if len(externalSSTs) == 0 && meta.Properties.NumEntries == 0 && meta.Properties.NumRangeKeys() == 0 {
2020 0 : // IngestAndExcise below will be a no-op. We should do a
2021 0 : // DeleteRange+RangeKeyDel to mimic the behaviour of the non-external-replicate
2022 0 : // case.
2023 0 : //
2024 0 : // TODO(bilal): Remove this when we support excises with no matching ingests.
2025 0 : if err := dest.RangeKeyDelete(r.start, r.end, t.writeOpts); err != nil {
2026 0 : h.Recordf("%s // %v", r, err)
2027 0 : return
2028 0 : }
2029 0 : err := dest.DeleteRange(r.start, r.end, t.writeOpts)
2030 0 : h.Recordf("%s // %v", r, err)
2031 0 : return
2032 : }
2033 :
2034 0 : _, err = dest.IngestAndExcise(context.Background(), []string{sstPath}, nil, externalSSTs /* external */, pebble.KeyRange{Start: r.start, End: r.end})
2035 0 : h.Recordf("%s // %v", r, err)
2036 : }
2037 :
2038 0 : func (r *replicateOp) run(t *Test, h historyRecorder) {
2039 0 : // Shared replication only works if shared storage is enabled.
2040 0 : useSharedIngest := t.testOpts.useSharedReplicate && t.testOpts.sharedStorageEnabled
2041 0 : useExternalIngest := t.testOpts.useExternalReplicate && t.testOpts.externalStorageEnabled
2042 0 :
2043 0 : source := t.getDB(r.source)
2044 0 : dest := t.getDB(r.dest)
2045 0 : sstPath := path.Join(t.tmpDir, fmt.Sprintf("ext-replicate%d.sst", t.idx))
2046 0 : f, err := t.opts.FS.Create(sstPath, vfs.WriteCategoryUnspecified)
2047 0 : if err != nil {
2048 0 : h.Recordf("%s // %v", r, err)
2049 0 : return
2050 0 : }
2051 0 : w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), t.opts.MakeWriterOptions(0, dest.FormatMajorVersion().MaxTableFormat()))
2052 0 :
2053 0 : // NB: In practice we'll either do shared replicate or external replicate,
2054 0 : // as ScanInternal does not support both. We arbitrarily choose to prioritize
2055 0 : // external replication if both are enabled, as those are likely to hit
2056 0 : // widespread usage first.
2057 0 : if useExternalIngest {
2058 0 : r.runExternalReplicate(t, h, source, dest, w, sstPath)
2059 0 : return
2060 0 : }
2061 0 : if useSharedIngest {
2062 0 : r.runSharedReplicate(t, h, source, dest, w, sstPath)
2063 0 : return
2064 0 : }
2065 :
2066 : // First, do a RangeKeyDelete and DeleteRange on the whole span.
2067 0 : if err := dest.RangeKeyDelete(r.start, r.end, t.writeOpts); err != nil {
2068 0 : h.Recordf("%s // %v", r, err)
2069 0 : return
2070 0 : }
2071 0 : if err := dest.DeleteRange(r.start, r.end, t.writeOpts); err != nil {
2072 0 : h.Recordf("%s // %v", r, err)
2073 0 : return
2074 0 : }
2075 0 : iter, err := source.NewIter(&pebble.IterOptions{
2076 0 : LowerBound: r.start,
2077 0 : UpperBound: r.end,
2078 0 : KeyTypes: pebble.IterKeyTypePointsAndRanges,
2079 0 : })
2080 0 : if err != nil {
2081 0 : panic(err)
2082 : }
2083 0 : defer iter.Close()
2084 0 :
2085 0 : for ok := iter.SeekGE(r.start); ok && iter.Error() == nil; ok = iter.Next() {
2086 0 : hasPoint, hasRange := iter.HasPointAndRange()
2087 0 : if hasPoint {
2088 0 : val, err := iter.ValueAndErr()
2089 0 : if err != nil {
2090 0 : panic(err)
2091 : }
2092 0 : if err := w.Set(iter.Key(), val); err != nil {
2093 0 : panic(err)
2094 : }
2095 : }
2096 0 : if hasRange && iter.RangeKeyChanged() {
2097 0 : rangeKeys := iter.RangeKeys()
2098 0 : rkStart, rkEnd := iter.RangeBounds()
2099 0 :
2100 0 : span := keyspan.Span{Start: rkStart, End: rkEnd, Keys: make([]keyspan.Key, len(rangeKeys))}
2101 0 : for i := range rangeKeys {
2102 0 : span.Keys[i] = keyspan.Key{
2103 0 : Trailer: base.MakeTrailer(0, base.InternalKeyKindRangeKeySet),
2104 0 : Suffix: rangeKeys[i].Suffix,
2105 0 : Value: rangeKeys[i].Value,
2106 0 : }
2107 0 : }
2108 0 : keyspan.SortKeysByTrailer(span.Keys)
2109 0 : if err := w.Raw().EncodeSpan(span); err != nil {
2110 0 : panic(err)
2111 : }
2112 : }
2113 : }
2114 0 : if err := iter.Error(); err != nil {
2115 0 : h.Recordf("%s // %v", r, err)
2116 0 : return
2117 0 : }
2118 0 : if err := w.Close(); err != nil {
2119 0 : panic(err)
2120 : }
2121 :
2122 0 : err = dest.Ingest(context.Background(), []string{sstPath})
2123 0 : h.Recordf("%s // %v", r, err)
2124 : }
2125 :
2126 1 : func (r *replicateOp) String() string {
2127 1 : return fmt.Sprintf("%s.Replicate(%s, %q, %q)", r.source, r.dest, r.start, r.end)
2128 1 : }
2129 :
2130 0 : func (r *replicateOp) receiver() objID { return r.source }
2131 0 : func (r *replicateOp) syncObjs() objIDSlice { return objIDSlice{r.dest} }
2132 :
2133 0 : func (r *replicateOp) keys() []*[]byte {
2134 0 : return []*[]byte{&r.start, &r.end}
2135 0 : }
2136 :
2137 1 : func (r *replicateOp) diagramKeyRanges() []pebble.KeyRange {
2138 1 : return []pebble.KeyRange{{Start: r.start, End: r.end}}
2139 1 : }
|