Line data Source code
1 : // Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package metamorphic
6 :
7 : import (
8 : "bytes"
9 : "context"
10 : "crypto/rand"
11 : "encoding/binary"
12 : "fmt"
13 : "io"
14 : "path"
15 : "path/filepath"
16 : "slices"
17 : "strings"
18 :
19 : "github.com/cockroachdb/errors"
20 : "github.com/cockroachdb/pebble"
21 : "github.com/cockroachdb/pebble/internal/base"
22 : "github.com/cockroachdb/pebble/internal/keyspan"
23 : "github.com/cockroachdb/pebble/internal/private"
24 : "github.com/cockroachdb/pebble/internal/rangekey"
25 : "github.com/cockroachdb/pebble/internal/testkeys"
26 : "github.com/cockroachdb/pebble/objstorage/objstorageprovider"
27 : "github.com/cockroachdb/pebble/sstable"
28 : "github.com/cockroachdb/pebble/vfs"
29 : "github.com/cockroachdb/pebble/vfs/errorfs"
30 : )
31 :
32 : // Ops holds a sequence of operations to be executed by the metamorphic tests.
33 : type Ops []op
34 :
35 : // op defines the interface for a single operation, such as creating a batch,
36 : // or advancing an iterator.
37 : type op interface {
38 : String() string
39 :
40 : run(t *Test, h historyRecorder)
41 :
42 : // receiver returns the object ID of the object the operation is performed
43 : // on. Every operation has a receiver (eg, batch0.Set(...) has `batch0` as
44 : // its receiver). Receivers are used for synchronization when running with
45 : // concurrency.
46 : receiver() objID
47 :
48 : // syncObjs returns an additional set of object IDs—excluding the
49 : // receiver—that the operation must synchronize with. At execution time,
50 : // the operation will run serially with respect to all other operations
51 : // that return these objects from their own syncObjs or receiver methods.
52 : syncObjs() objIDSlice
53 :
54 : // keys returns all user keys used by the operation, as pointers to slices.
55 : // The caller can then modify these slices to rewrite the keys.
56 : //
57 : // Used for simplification of operations for easier investigations.
58 : keys() []*[]byte
59 :
60 : // diagramKeyRanges() returns key spans associated with this operation, to be
61 : // shown on an ASCII diagram of operations.
62 : diagramKeyRanges() []pebble.KeyRange
63 : }
64 :
65 : // initOp performs test initialization
66 : type initOp struct {
67 : dbSlots uint32
68 : batchSlots uint32
69 : iterSlots uint32
70 : snapshotSlots uint32
71 : externalObjSlots uint32
72 : }
73 :
74 1 : func (o *initOp) run(t *Test, h historyRecorder) {
75 1 : t.batches = make([]*pebble.Batch, o.batchSlots)
76 1 : t.iters = make([]*retryableIter, o.iterSlots)
77 1 : t.snapshots = make([]readerCloser, o.snapshotSlots)
78 1 : t.externalObjs = make([]externalObjMeta, o.externalObjSlots)
79 1 : h.Recordf("%s", o)
80 1 : }
81 :
82 1 : func (o *initOp) String() string {
83 1 : return fmt.Sprintf("Init(%d /* dbs */, %d /* batches */, %d /* iters */, %d /* snapshots */, %d /* externalObjs */)",
84 1 : o.dbSlots, o.batchSlots, o.iterSlots, o.snapshotSlots, o.externalObjSlots)
85 1 : }
86 :
87 1 : func (o *initOp) receiver() objID { return makeObjID(dbTag, 1) }
88 1 : func (o *initOp) syncObjs() objIDSlice {
89 1 : syncObjs := make([]objID, 0)
90 1 : // Add any additional DBs to syncObjs.
91 1 : for i := uint32(2); i < o.dbSlots+1; i++ {
92 0 : syncObjs = append(syncObjs, makeObjID(dbTag, i))
93 0 : }
94 1 : return syncObjs
95 : }
96 :
97 0 : func (o *initOp) keys() []*[]byte { return nil }
98 1 : func (o *initOp) diagramKeyRanges() []pebble.KeyRange { return nil }
99 :
100 : // applyOp models a Writer.Apply operation.
101 : type applyOp struct {
102 : writerID objID
103 : batchID objID
104 : }
105 :
106 1 : func (o *applyOp) run(t *Test, h historyRecorder) {
107 1 : b := t.getBatch(o.batchID)
108 1 : w := t.getWriter(o.writerID)
109 1 : var err error
110 1 : if o.writerID.tag() == dbTag && t.testOpts.asyncApplyToDB && t.writeOpts.Sync {
111 0 : err = w.(*pebble.DB).ApplyNoSyncWait(b, t.writeOpts)
112 0 : if err == nil {
113 0 : err = b.SyncWait()
114 0 : }
115 1 : } else {
116 1 : err = w.Apply(b, t.writeOpts)
117 1 : }
118 1 : h.Recordf("%s // %v", o, err)
119 : // batch will be closed by a closeOp which is guaranteed to be generated
120 : }
121 :
122 1 : func (o *applyOp) String() string { return fmt.Sprintf("%s.Apply(%s)", o.writerID, o.batchID) }
123 1 : func (o *applyOp) receiver() objID { return o.writerID }
124 1 : func (o *applyOp) syncObjs() objIDSlice {
125 1 : // Apply should not be concurrent with operations that are mutating the
126 1 : // batch.
127 1 : return []objID{o.batchID}
128 1 : }
129 :
130 0 : func (o *applyOp) keys() []*[]byte { return nil }
131 0 : func (o *applyOp) diagramKeyRanges() []pebble.KeyRange { return nil }
132 :
133 : // checkpointOp models a DB.Checkpoint operation.
134 : type checkpointOp struct {
135 : dbID objID
136 : // If non-empty, the checkpoint is restricted to these spans.
137 : spans []pebble.CheckpointSpan
138 : }
139 :
140 1 : func (o *checkpointOp) run(t *Test, h historyRecorder) {
141 1 : // TODO(josh): db.Checkpoint does not work with shared storage yet.
142 1 : // It would be better to filter out ahead of calling run on the op,
143 1 : // by setting the weight that generator.go uses to zero, or similar.
144 1 : // But IIUC the ops are shared for ALL the metamorphic test runs, so
145 1 : // not sure how to do that easily:
146 1 : // https://github.com/cockroachdb/pebble/blob/master/metamorphic/meta.go#L177
147 1 : if t.testOpts.sharedStorageEnabled || t.testOpts.externalStorageEnabled {
148 1 : h.Recordf("%s // %v", o, nil)
149 1 : return
150 1 : }
151 1 : var opts []pebble.CheckpointOption
152 1 : if len(o.spans) > 0 {
153 1 : opts = append(opts, pebble.WithRestrictToSpans(o.spans))
154 1 : }
155 1 : db := t.getDB(o.dbID)
156 1 : err := t.withRetries(func() error {
157 1 : return db.Checkpoint(o.dir(t.dir, h.op), opts...)
158 1 : })
159 1 : h.Recordf("%s // %v", o, err)
160 : }
161 :
162 1 : func (o *checkpointOp) dir(dataDir string, idx int) string {
163 1 : return filepath.Join(dataDir, "checkpoints", fmt.Sprintf("op-%06d", idx))
164 1 : }
165 :
166 1 : func (o *checkpointOp) String() string {
167 1 : var spanStr bytes.Buffer
168 1 : for i, span := range o.spans {
169 1 : if i > 0 {
170 1 : spanStr.WriteString(",")
171 1 : }
172 1 : fmt.Fprintf(&spanStr, "%q,%q", span.Start, span.End)
173 : }
174 1 : return fmt.Sprintf("%s.Checkpoint(%s)", o.dbID, spanStr.String())
175 : }
176 :
177 1 : func (o *checkpointOp) receiver() objID { return o.dbID }
178 1 : func (o *checkpointOp) syncObjs() objIDSlice { return nil }
179 :
180 0 : func (o *checkpointOp) keys() []*[]byte {
181 0 : var res []*[]byte
182 0 : for i := range o.spans {
183 0 : res = append(res, &o.spans[i].Start, &o.spans[i].End)
184 0 : }
185 0 : return res
186 : }
187 :
188 0 : func (o *checkpointOp) diagramKeyRanges() []pebble.KeyRange {
189 0 : var res []pebble.KeyRange
190 0 : for i := range o.spans {
191 0 : res = append(res, pebble.KeyRange{
192 0 : Start: o.spans[i].Start,
193 0 : End: o.spans[i].End,
194 0 : })
195 0 : }
196 0 : return res
197 : }
198 :
199 : // downloadOp models a DB.Download operation.
200 : type downloadOp struct {
201 : dbID objID
202 : spans []pebble.DownloadSpan
203 : }
204 :
205 1 : func (o *downloadOp) run(t *Test, h historyRecorder) {
206 1 : db := t.getDB(o.dbID)
207 1 : err := t.withRetries(func() error {
208 1 : return db.Download(context.Background(), o.spans)
209 1 : })
210 1 : h.Recordf("%s // %v", o, err)
211 : }
212 :
213 1 : func (o *downloadOp) String() string {
214 1 : var spanStr bytes.Buffer
215 1 : for i, span := range o.spans {
216 1 : if i > 0 {
217 1 : spanStr.WriteString(", ")
218 1 : }
219 1 : fmt.Fprintf(&spanStr, "%q /* start */, %q /* end */, %v /* viaBackingFileDownload */",
220 1 : span.StartKey, span.EndKey, span.ViaBackingFileDownload)
221 : }
222 1 : return fmt.Sprintf("%s.Download(%s)", o.dbID, spanStr.String())
223 : }
224 :
225 1 : func (o *downloadOp) receiver() objID { return o.dbID }
226 1 : func (o downloadOp) syncObjs() objIDSlice { return nil }
227 :
228 0 : func (o *downloadOp) keys() []*[]byte {
229 0 : var res []*[]byte
230 0 : for i := range o.spans {
231 0 : res = append(res, &o.spans[i].StartKey, &o.spans[i].EndKey)
232 0 : }
233 0 : return res
234 : }
235 :
236 0 : func (o *downloadOp) diagramKeyRanges() []pebble.KeyRange {
237 0 : var res []pebble.KeyRange
238 0 : for i := range o.spans {
239 0 : res = append(res, pebble.KeyRange{
240 0 : Start: o.spans[i].StartKey,
241 0 : End: o.spans[i].EndKey,
242 0 : })
243 0 : }
244 0 : return res
245 : }
246 :
247 : // closeOp models a {Batch,Iterator,Snapshot}.Close operation.
248 : type closeOp struct {
249 : objID objID
250 :
251 : // affectedObjects is the list of additional objects that are affected by this
252 : // operation, and which syncObjs() must return so that we don't perform the
253 : // close in parallel with other operations to affected objects.
254 : affectedObjects []objID
255 : }
256 :
257 1 : func (o *closeOp) run(t *Test, h historyRecorder) {
258 1 : c := t.getCloser(o.objID)
259 1 : if o.objID.tag() == dbTag && t.opts.DisableWAL {
260 1 : // Special case: If WAL is disabled, do a flush right before DB Close. This
261 1 : // allows us to reuse this run's data directory as initial state for
262 1 : // future runs without losing any mutations.
263 1 : _ = t.getDB(o.objID).Flush()
264 1 : }
265 1 : t.clearObj(o.objID)
266 1 : err := c.Close()
267 1 : h.Recordf("%s // %v", o, err)
268 : }
269 :
270 1 : func (o *closeOp) String() string { return fmt.Sprintf("%s.Close()", o.objID) }
271 1 : func (o *closeOp) receiver() objID { return o.objID }
272 1 : func (o *closeOp) syncObjs() objIDSlice {
273 1 : return o.affectedObjects
274 1 : }
275 :
276 0 : func (o *closeOp) keys() []*[]byte { return nil }
277 0 : func (o *closeOp) diagramKeyRanges() []pebble.KeyRange { return nil }
278 :
279 : // compactOp models a DB.Compact operation.
280 : type compactOp struct {
281 : dbID objID
282 : start []byte
283 : end []byte
284 : parallelize bool
285 : }
286 :
287 1 : func (o *compactOp) run(t *Test, h historyRecorder) {
288 1 : err := t.withRetries(func() error {
289 1 : return t.getDB(o.dbID).Compact(o.start, o.end, o.parallelize)
290 1 : })
291 1 : h.Recordf("%s // %v", o, err)
292 : }
293 :
294 1 : func (o *compactOp) String() string {
295 1 : return fmt.Sprintf("%s.Compact(%q, %q, %t /* parallelize */)", o.dbID, o.start, o.end, o.parallelize)
296 1 : }
297 :
298 1 : func (o *compactOp) receiver() objID { return o.dbID }
299 1 : func (o *compactOp) syncObjs() objIDSlice { return nil }
300 :
301 1 : func (o *compactOp) keys() []*[]byte {
302 1 : return []*[]byte{&o.start, &o.end}
303 1 : }
304 :
305 1 : func (o *compactOp) diagramKeyRanges() []pebble.KeyRange {
306 1 : return []pebble.KeyRange{{Start: o.start, End: o.end}}
307 1 : }
308 :
309 : // deleteOp models a Write.Delete operation.
310 : type deleteOp struct {
311 : writerID objID
312 : key []byte
313 :
314 : derivedDBID objID
315 : }
316 :
317 1 : func (o *deleteOp) run(t *Test, h historyRecorder) {
318 1 : w := t.getWriter(o.writerID)
319 1 : var err error
320 1 : if t.testOpts.deleteSized && t.isFMV(o.derivedDBID, pebble.FormatDeleteSizedAndObsolete) {
321 1 : // Call DeleteSized with a deterministic size derived from the index.
322 1 : // The size does not need to be accurate for correctness.
323 1 : err = w.DeleteSized(o.key, hashSize(t.idx), t.writeOpts)
324 1 : } else {
325 1 : err = w.Delete(o.key, t.writeOpts)
326 1 : }
327 1 : h.Recordf("%s // %v", o, err)
328 : }
329 :
330 1 : func hashSize(index int) uint32 {
331 1 : // Fibonacci hash https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
332 1 : return uint32((11400714819323198485 * uint64(index)) % maxValueSize)
333 1 : }
334 :
335 1 : func (o *deleteOp) String() string {
336 1 : return fmt.Sprintf("%s.Delete(%q)", o.writerID, o.key)
337 1 : }
338 1 : func (o *deleteOp) receiver() objID { return o.writerID }
339 1 : func (o *deleteOp) syncObjs() objIDSlice { return nil }
340 :
341 0 : func (o *deleteOp) keys() []*[]byte {
342 0 : return []*[]byte{&o.key}
343 0 : }
344 :
345 0 : func (o *deleteOp) diagramKeyRanges() []pebble.KeyRange {
346 0 : return []pebble.KeyRange{{Start: o.key, End: o.key}}
347 0 : }
348 :
349 : // singleDeleteOp models a Write.SingleDelete operation.
350 : type singleDeleteOp struct {
351 : writerID objID
352 : key []byte
353 : maybeReplaceDelete bool
354 : }
355 :
356 1 : func (o *singleDeleteOp) run(t *Test, h historyRecorder) {
357 1 : w := t.getWriter(o.writerID)
358 1 : var err error
359 1 : if t.testOpts.replaceSingleDelete && o.maybeReplaceDelete {
360 1 : err = w.Delete(o.key, t.writeOpts)
361 1 : } else {
362 1 : err = w.SingleDelete(o.key, t.writeOpts)
363 1 : }
364 : // NOTE: even if the SINGLEDEL was replaced with a DELETE, we must still
365 : // write the former to the history log. The log line will indicate whether
366 : // or not the delete *could* have been replaced. The OPTIONS file should
367 : // also be consulted to determine what happened at runtime (i.e. by taking
368 : // the logical AND).
369 1 : h.Recordf("%s // %v", o, err)
370 : }
371 :
372 1 : func (o *singleDeleteOp) String() string {
373 1 : return fmt.Sprintf("%s.SingleDelete(%q, %v /* maybeReplaceDelete */)", o.writerID, o.key, o.maybeReplaceDelete)
374 1 : }
375 :
376 1 : func (o *singleDeleteOp) receiver() objID { return o.writerID }
377 1 : func (o *singleDeleteOp) syncObjs() objIDSlice { return nil }
378 :
379 0 : func (o *singleDeleteOp) keys() []*[]byte {
380 0 : return []*[]byte{&o.key}
381 0 : }
382 :
383 0 : func (o *singleDeleteOp) diagramKeyRanges() []pebble.KeyRange {
384 0 : return []pebble.KeyRange{{Start: o.key, End: o.key}}
385 0 : }
386 :
387 : // deleteRangeOp models a Write.DeleteRange operation.
388 : type deleteRangeOp struct {
389 : writerID objID
390 : start []byte
391 : end []byte
392 : }
393 :
394 1 : func (o *deleteRangeOp) run(t *Test, h historyRecorder) {
395 1 : w := t.getWriter(o.writerID)
396 1 : err := w.DeleteRange(o.start, o.end, t.writeOpts)
397 1 : h.Recordf("%s // %v", o, err)
398 1 : }
399 :
400 1 : func (o *deleteRangeOp) String() string {
401 1 : return fmt.Sprintf("%s.DeleteRange(%q, %q)", o.writerID, o.start, o.end)
402 1 : }
403 :
404 1 : func (o *deleteRangeOp) receiver() objID { return o.writerID }
405 1 : func (o *deleteRangeOp) syncObjs() objIDSlice { return nil }
406 :
407 0 : func (o *deleteRangeOp) keys() []*[]byte {
408 0 : return []*[]byte{&o.start, &o.end}
409 0 : }
410 :
411 0 : func (o *deleteRangeOp) diagramKeyRanges() []pebble.KeyRange {
412 0 : return []pebble.KeyRange{{Start: o.start, End: o.end}}
413 0 : }
414 :
415 : // flushOp models a DB.Flush operation.
416 : type flushOp struct {
417 : db objID
418 : }
419 :
420 1 : func (o *flushOp) run(t *Test, h historyRecorder) {
421 1 : db := t.getDB(o.db)
422 1 : err := db.Flush()
423 1 : h.Recordf("%s // %v", o, err)
424 1 : }
425 :
426 1 : func (o *flushOp) String() string { return fmt.Sprintf("%s.Flush()", o.db) }
427 1 : func (o *flushOp) receiver() objID { return o.db }
428 1 : func (o *flushOp) syncObjs() objIDSlice { return nil }
429 0 : func (o *flushOp) keys() []*[]byte { return nil }
430 0 : func (o *flushOp) diagramKeyRanges() []pebble.KeyRange { return nil }
431 :
432 : // mergeOp models a Write.Merge operation.
433 : type mergeOp struct {
434 : writerID objID
435 : key []byte
436 : value []byte
437 : }
438 :
439 1 : func (o *mergeOp) run(t *Test, h historyRecorder) {
440 1 : w := t.getWriter(o.writerID)
441 1 : err := w.Merge(o.key, o.value, t.writeOpts)
442 1 : h.Recordf("%s // %v", o, err)
443 1 : }
444 :
445 1 : func (o *mergeOp) String() string { return fmt.Sprintf("%s.Merge(%q, %q)", o.writerID, o.key, o.value) }
446 1 : func (o *mergeOp) receiver() objID { return o.writerID }
447 1 : func (o *mergeOp) syncObjs() objIDSlice { return nil }
448 :
449 0 : func (o *mergeOp) keys() []*[]byte {
450 0 : return []*[]byte{&o.key}
451 0 : }
452 :
453 0 : func (o *mergeOp) diagramKeyRanges() []pebble.KeyRange {
454 0 : return []pebble.KeyRange{{Start: o.key, End: o.key}}
455 0 : }
456 :
457 : // setOp models a Write.Set operation.
458 : type setOp struct {
459 : writerID objID
460 : key []byte
461 : value []byte
462 : }
463 :
464 1 : func (o *setOp) run(t *Test, h historyRecorder) {
465 1 : w := t.getWriter(o.writerID)
466 1 : err := w.Set(o.key, o.value, t.writeOpts)
467 1 : h.Recordf("%s // %v", o, err)
468 1 : }
469 :
470 1 : func (o *setOp) String() string { return fmt.Sprintf("%s.Set(%q, %q)", o.writerID, o.key, o.value) }
471 1 : func (o *setOp) receiver() objID { return o.writerID }
472 1 : func (o *setOp) syncObjs() objIDSlice { return nil }
473 :
474 0 : func (o *setOp) keys() []*[]byte {
475 0 : return []*[]byte{&o.key}
476 0 : }
477 :
478 0 : func (o *setOp) diagramKeyRanges() []pebble.KeyRange {
479 0 : return []pebble.KeyRange{{Start: o.key, End: o.key}}
480 0 : }
481 :
482 : // rangeKeyDeleteOp models a Write.RangeKeyDelete operation.
483 : type rangeKeyDeleteOp struct {
484 : writerID objID
485 : start []byte
486 : end []byte
487 : }
488 :
489 1 : func (o *rangeKeyDeleteOp) run(t *Test, h historyRecorder) {
490 1 : w := t.getWriter(o.writerID)
491 1 : err := w.RangeKeyDelete(o.start, o.end, t.writeOpts)
492 1 : h.Recordf("%s // %v", o, err)
493 1 : }
494 :
495 1 : func (o *rangeKeyDeleteOp) String() string {
496 1 : return fmt.Sprintf("%s.RangeKeyDelete(%q, %q)", o.writerID, o.start, o.end)
497 1 : }
498 :
499 1 : func (o *rangeKeyDeleteOp) receiver() objID { return o.writerID }
500 1 : func (o *rangeKeyDeleteOp) syncObjs() objIDSlice { return nil }
501 :
502 0 : func (o *rangeKeyDeleteOp) keys() []*[]byte {
503 0 : return []*[]byte{&o.start, &o.end}
504 0 : }
505 :
506 1 : func (o *rangeKeyDeleteOp) diagramKeyRanges() []pebble.KeyRange {
507 1 : return []pebble.KeyRange{{Start: o.start, End: o.end}}
508 1 : }
509 :
510 : // rangeKeySetOp models a Write.RangeKeySet operation.
511 : type rangeKeySetOp struct {
512 : writerID objID
513 : start []byte
514 : end []byte
515 : suffix []byte
516 : value []byte
517 : }
518 :
519 1 : func (o *rangeKeySetOp) run(t *Test, h historyRecorder) {
520 1 : w := t.getWriter(o.writerID)
521 1 : err := w.RangeKeySet(o.start, o.end, o.suffix, o.value, t.writeOpts)
522 1 : h.Recordf("%s // %v", o, err)
523 1 : }
524 :
525 1 : func (o *rangeKeySetOp) String() string {
526 1 : return fmt.Sprintf("%s.RangeKeySet(%q, %q, %q, %q)",
527 1 : o.writerID, o.start, o.end, o.suffix, o.value)
528 1 : }
529 :
530 1 : func (o *rangeKeySetOp) receiver() objID { return o.writerID }
531 1 : func (o *rangeKeySetOp) syncObjs() objIDSlice { return nil }
532 :
533 1 : func (o *rangeKeySetOp) keys() []*[]byte {
534 1 : return []*[]byte{&o.start, &o.end}
535 1 : }
536 :
537 1 : func (o *rangeKeySetOp) diagramKeyRanges() []pebble.KeyRange {
538 1 : return []pebble.KeyRange{{Start: o.start, End: o.end}}
539 1 : }
540 :
541 : // rangeKeyUnsetOp models a Write.RangeKeyUnset operation.
542 : type rangeKeyUnsetOp struct {
543 : writerID objID
544 : start []byte
545 : end []byte
546 : suffix []byte
547 : }
548 :
549 1 : func (o *rangeKeyUnsetOp) run(t *Test, h historyRecorder) {
550 1 : w := t.getWriter(o.writerID)
551 1 : err := w.RangeKeyUnset(o.start, o.end, o.suffix, t.writeOpts)
552 1 : h.Recordf("%s // %v", o, err)
553 1 : }
554 :
555 1 : func (o *rangeKeyUnsetOp) String() string {
556 1 : return fmt.Sprintf("%s.RangeKeyUnset(%q, %q, %q)",
557 1 : o.writerID, o.start, o.end, o.suffix)
558 1 : }
559 :
560 1 : func (o *rangeKeyUnsetOp) receiver() objID { return o.writerID }
561 1 : func (o *rangeKeyUnsetOp) syncObjs() objIDSlice { return nil }
562 :
563 0 : func (o *rangeKeyUnsetOp) keys() []*[]byte {
564 0 : return []*[]byte{&o.start, &o.end}
565 0 : }
566 :
567 0 : func (o *rangeKeyUnsetOp) diagramKeyRanges() []pebble.KeyRange {
568 0 : return []pebble.KeyRange{{Start: o.start, End: o.end}}
569 0 : }
570 :
571 : // logDataOp models a Writer.LogData operation.
572 : type logDataOp struct {
573 : writerID objID
574 : data []byte
575 : }
576 :
577 1 : func (o *logDataOp) run(t *Test, h historyRecorder) {
578 1 : w := t.getWriter(o.writerID)
579 1 : err := w.LogData(o.data, t.writeOpts)
580 1 : h.Recordf("%s // %v", o, err)
581 1 : }
582 :
583 1 : func (o *logDataOp) String() string {
584 1 : return fmt.Sprintf("%s.LogData(%q)", o.writerID, o.data)
585 1 : }
586 :
587 1 : func (o *logDataOp) receiver() objID { return o.writerID }
588 1 : func (o *logDataOp) syncObjs() objIDSlice { return nil }
589 0 : func (o *logDataOp) keys() []*[]byte { return []*[]byte{} }
590 0 : func (o *logDataOp) diagramKeyRanges() []pebble.KeyRange { return []pebble.KeyRange{} }
591 :
592 : // newBatchOp models a Write.NewBatch operation.
593 : type newBatchOp struct {
594 : dbID objID
595 : batchID objID
596 : }
597 :
598 1 : func (o *newBatchOp) run(t *Test, h historyRecorder) {
599 1 : b := t.getDB(o.dbID).NewBatch()
600 1 : t.setBatch(o.batchID, b)
601 1 : h.Recordf("%s", o)
602 1 : }
603 :
604 1 : func (o *newBatchOp) String() string { return fmt.Sprintf("%s = %s.NewBatch()", o.batchID, o.dbID) }
605 1 : func (o *newBatchOp) receiver() objID { return o.dbID }
606 1 : func (o *newBatchOp) syncObjs() objIDSlice {
607 1 : // NewBatch should not be concurrent with operations that interact with that
608 1 : // same batch.
609 1 : return []objID{o.batchID}
610 1 : }
611 :
612 0 : func (o *newBatchOp) keys() []*[]byte { return nil }
613 0 : func (o *newBatchOp) diagramKeyRanges() []pebble.KeyRange { return nil }
614 :
615 : // newIndexedBatchOp models a Write.NewIndexedBatch operation.
616 : type newIndexedBatchOp struct {
617 : dbID objID
618 : batchID objID
619 : }
620 :
621 1 : func (o *newIndexedBatchOp) run(t *Test, h historyRecorder) {
622 1 : b := t.getDB(o.dbID).NewIndexedBatch()
623 1 : t.setBatch(o.batchID, b)
624 1 : h.Recordf("%s", o)
625 1 : }
626 :
627 1 : func (o *newIndexedBatchOp) String() string {
628 1 : return fmt.Sprintf("%s = %s.NewIndexedBatch()", o.batchID, o.dbID)
629 1 : }
630 1 : func (o *newIndexedBatchOp) receiver() objID { return o.dbID }
631 1 : func (o *newIndexedBatchOp) syncObjs() objIDSlice {
632 1 : // NewIndexedBatch should not be concurrent with operations that interact
633 1 : // with that same batch.
634 1 : return []objID{o.batchID}
635 1 : }
636 :
637 0 : func (o *newIndexedBatchOp) keys() []*[]byte { return nil }
638 0 : func (o *newIndexedBatchOp) diagramKeyRanges() []pebble.KeyRange { return nil }
639 :
640 : // batchCommitOp models a Batch.Commit operation.
641 : type batchCommitOp struct {
642 : dbID objID
643 : batchID objID
644 : }
645 :
646 1 : func (o *batchCommitOp) run(t *Test, h historyRecorder) {
647 1 : b := t.getBatch(o.batchID)
648 1 : err := b.Commit(t.writeOpts)
649 1 : h.Recordf("%s // %v", o, err)
650 1 : }
651 :
652 1 : func (o *batchCommitOp) String() string { return fmt.Sprintf("%s.Commit()", o.batchID) }
653 1 : func (o *batchCommitOp) receiver() objID { return o.batchID }
654 1 : func (o *batchCommitOp) syncObjs() objIDSlice {
655 1 : // Synchronize on the database so that NewIters wait for the commit.
656 1 : return []objID{o.dbID}
657 1 : }
658 :
659 0 : func (o *batchCommitOp) keys() []*[]byte { return nil }
660 0 : func (o *batchCommitOp) diagramKeyRanges() []pebble.KeyRange { return nil }
661 :
662 : // ingestOp models a DB.Ingest operation.
663 : type ingestOp struct {
664 : dbID objID
665 : batchIDs []objID
666 :
667 : derivedDBIDs []objID
668 : }
669 :
670 1 : func (o *ingestOp) run(t *Test, h historyRecorder) {
671 1 : // We can only use apply as an alternative for ingestion if we are ingesting
672 1 : // a single batch. If we are ingesting multiple batches, the batches may
673 1 : // overlap which would cause ingestion to fail but apply would succeed.
674 1 : if t.testOpts.ingestUsingApply && len(o.batchIDs) == 1 && o.derivedDBIDs[0] == o.dbID {
675 1 : id := o.batchIDs[0]
676 1 : b := t.getBatch(id)
677 1 : iter, rangeDelIter, rangeKeyIter := private.BatchSort(b)
678 1 : db := t.getDB(o.dbID)
679 1 : c, err := o.collapseBatch(t, db, iter, rangeDelIter, rangeKeyIter, b)
680 1 : if err == nil {
681 1 : err = db.Apply(c, t.writeOpts)
682 1 : }
683 1 : _ = b.Close()
684 1 : _ = c.Close()
685 1 : t.clearObj(id)
686 1 : h.Recordf("%s // %v", o, err)
687 1 : return
688 : }
689 :
690 1 : var paths []string
691 1 : var err error
692 1 : for i, id := range o.batchIDs {
693 1 : b := t.getBatch(id)
694 1 : t.clearObj(id)
695 1 : path, _, err2 := buildForIngest(t, o.dbID, b, i)
696 1 : if err2 != nil {
697 0 : h.Recordf("Build(%s) // %v", id, err2)
698 0 : }
699 1 : err = firstError(err, err2)
700 1 : if err2 == nil {
701 1 : paths = append(paths, path)
702 1 : }
703 1 : err = firstError(err, b.Close())
704 : }
705 :
706 1 : err = firstError(err, t.withRetries(func() error {
707 1 : return t.getDB(o.dbID).Ingest(paths)
708 1 : }))
709 :
710 1 : h.Recordf("%s // %v", o, err)
711 : }
712 :
713 1 : func (o *ingestOp) receiver() objID { return o.dbID }
714 1 : func (o *ingestOp) syncObjs() objIDSlice {
715 1 : // Ingest should not be concurrent with mutating the batches that will be
716 1 : // ingested as sstables.
717 1 : objs := make([]objID, 0, len(o.batchIDs)+1)
718 1 : objs = append(objs, o.batchIDs...)
719 1 : addedDBs := make(map[objID]struct{})
720 1 : for i := range o.derivedDBIDs {
721 1 : _, ok := addedDBs[o.derivedDBIDs[i]]
722 1 : if !ok && o.derivedDBIDs[i] != o.dbID {
723 0 : objs = append(objs, o.derivedDBIDs[i])
724 0 : addedDBs[o.derivedDBIDs[i]] = struct{}{}
725 0 : }
726 : }
727 1 : return objs
728 : }
729 :
730 : func closeIters(
731 : pointIter base.InternalIterator,
732 : rangeDelIter keyspan.FragmentIterator,
733 : rangeKeyIter keyspan.FragmentIterator,
734 1 : ) {
735 1 : if pointIter != nil {
736 1 : pointIter.Close()
737 1 : }
738 1 : if rangeDelIter != nil {
739 1 : rangeDelIter.Close()
740 1 : }
741 1 : if rangeKeyIter != nil {
742 1 : rangeKeyIter.Close()
743 1 : }
744 : }
745 :
746 : // collapseBatch collapses the mutations in a batch to be equivalent to an
747 : // sstable ingesting those mutations. Duplicate updates to a key are collapsed
748 : // so that only the latest update is performed. All range deletions are
749 : // performed first in the batch to match the semantics of ingestion where a
750 : // range deletion does not delete a point record contained in the sstable.
751 : func (o *ingestOp) collapseBatch(
752 : t *Test,
753 : db *pebble.DB,
754 : pointIter base.InternalIterator,
755 : rangeDelIter, rangeKeyIter keyspan.FragmentIterator,
756 : b *pebble.Batch,
757 1 : ) (*pebble.Batch, error) {
758 1 : defer closeIters(pointIter, rangeDelIter, rangeKeyIter)
759 1 : equal := t.opts.Comparer.Equal
760 1 : collapsed := db.NewBatch()
761 1 :
762 1 : if rangeDelIter != nil {
763 1 : // NB: The range tombstones have already been fragmented by the Batch.
764 1 : t, err := rangeDelIter.First()
765 1 : for ; t != nil; t, err = rangeDelIter.Next() {
766 1 : // NB: We don't have to copy the key or value since we're reading from a
767 1 : // batch which doesn't do prefix compression.
768 1 : if err := collapsed.DeleteRange(t.Start, t.End, nil); err != nil {
769 0 : return nil, err
770 0 : }
771 : }
772 1 : if err != nil {
773 0 : return nil, err
774 0 : }
775 1 : if err := rangeDelIter.Close(); err != nil {
776 0 : return nil, err
777 0 : }
778 1 : rangeDelIter = nil
779 : }
780 :
781 1 : if pointIter != nil {
782 1 : var lastUserKey []byte
783 1 : for kv := pointIter.First(); kv != nil; kv = pointIter.Next() {
784 1 : // Ignore duplicate keys.
785 1 : //
786 1 : // Note: this is necessary due to MERGE keys, otherwise it would be
787 1 : // fine to include all the keys in the batch and let the normal
788 1 : // sequence number precedence determine which of the keys "wins".
789 1 : // But the code to build the ingested sstable will only keep the
790 1 : // most recent internal key and will not merge across internal keys.
791 1 : if equal(lastUserKey, kv.K.UserKey) {
792 0 : continue
793 : }
794 : // NB: We don't have to copy the key or value since we're reading from a
795 : // batch which doesn't do prefix compression.
796 1 : lastUserKey = kv.K.UserKey
797 1 :
798 1 : var err error
799 1 : switch kv.Kind() {
800 1 : case pebble.InternalKeyKindDelete:
801 1 : err = collapsed.Delete(kv.K.UserKey, nil)
802 1 : case pebble.InternalKeyKindDeleteSized:
803 1 : v, _ := binary.Uvarint(kv.InPlaceValue())
804 1 : // Batch.DeleteSized takes just the length of the value being
805 1 : // deleted and adds the key's length to derive the overall entry
806 1 : // size of the value being deleted. This has already been done
807 1 : // to the key we're reading from the batch, so we must subtract
808 1 : // the key length from the encoded value before calling
809 1 : // collapsed.DeleteSized, which will again add the key length
810 1 : // before encoding.
811 1 : err = collapsed.DeleteSized(kv.K.UserKey, uint32(v-uint64(len(kv.K.UserKey))), nil)
812 1 : case pebble.InternalKeyKindSingleDelete:
813 1 : err = collapsed.SingleDelete(kv.K.UserKey, nil)
814 1 : case pebble.InternalKeyKindSet:
815 1 : err = collapsed.Set(kv.K.UserKey, kv.InPlaceValue(), nil)
816 1 : case pebble.InternalKeyKindMerge:
817 1 : err = collapsed.Merge(kv.K.UserKey, kv.InPlaceValue(), nil)
818 0 : case pebble.InternalKeyKindLogData:
819 0 : err = collapsed.LogData(kv.K.UserKey, nil)
820 0 : default:
821 0 : err = errors.Errorf("unknown batch record kind: %d", kv.Kind())
822 : }
823 1 : if err != nil {
824 0 : return nil, err
825 0 : }
826 : }
827 1 : if err := pointIter.Close(); err != nil {
828 0 : return nil, err
829 0 : }
830 1 : pointIter = nil
831 : }
832 :
833 : // There's no equivalent of a MERGE operator for range keys, so there's no
834 : // need to collapse the range keys here. Rather than reading the range keys
835 : // from `rangeKeyIter`, which will already be fragmented, read the range
836 : // keys from the batch and copy them verbatim. This marginally improves our
837 : // test coverage over the alternative approach of pre-fragmenting and
838 : // pre-coalescing before writing to the batch.
839 : //
840 : // The `rangeKeyIter` is used only to determine if there are any range keys
841 : // in the batch at all, and only because we already have it handy from
842 : // private.BatchSort.
843 1 : if rangeKeyIter != nil {
844 1 : for r := b.Reader(); ; {
845 1 : kind, key, value, ok, err := r.Next()
846 1 : if !ok {
847 1 : if err != nil {
848 0 : return nil, err
849 0 : }
850 1 : break
851 1 : } else if !rangekey.IsRangeKey(kind) {
852 1 : continue
853 : }
854 1 : ik := base.MakeInternalKey(key, 0, kind)
855 1 : if err := collapsed.AddInternalKey(&ik, value, nil); err != nil {
856 0 : return nil, err
857 0 : }
858 : }
859 1 : if err := rangeKeyIter.Close(); err != nil {
860 0 : return nil, err
861 0 : }
862 1 : rangeKeyIter = nil
863 : }
864 :
865 1 : return collapsed, nil
866 : }
867 :
868 1 : func (o *ingestOp) String() string {
869 1 : var buf strings.Builder
870 1 : buf.WriteString(o.dbID.String())
871 1 : buf.WriteString(".Ingest(")
872 1 : for i, id := range o.batchIDs {
873 1 : if i > 0 {
874 1 : buf.WriteString(", ")
875 1 : }
876 1 : buf.WriteString(id.String())
877 : }
878 1 : buf.WriteString(")")
879 1 : return buf.String()
880 : }
881 :
882 0 : func (o *ingestOp) keys() []*[]byte { return nil }
883 0 : func (o *ingestOp) diagramKeyRanges() []pebble.KeyRange { return nil }
884 :
885 : type ingestAndExciseOp struct {
886 : dbID objID
887 : batchID objID
888 : derivedDBID objID
889 : exciseStart, exciseEnd []byte
890 : sstContainsExciseTombstone bool
891 : }
892 :
893 1 : func (o *ingestAndExciseOp) run(t *Test, h historyRecorder) {
894 1 : var err error
895 1 : b := t.getBatch(o.batchID)
896 1 : t.clearObj(o.batchID)
897 1 : if t.testOpts.Opts.Comparer.Compare(o.exciseEnd, o.exciseStart) <= 0 {
898 0 : panic("non-well-formed excise span")
899 : }
900 1 : db := t.getDB(o.dbID)
901 1 : if b.Empty() {
902 1 : h.Recordf("%s // %v", o, o.simulateExcise(db, t))
903 1 : return
904 1 : }
905 :
906 1 : if o.sstContainsExciseTombstone {
907 1 : // Add a rangedel and rangekeydel to the batch. This ensures it'll end up
908 1 : // inside the sstable. Note that all entries in the sstable will have the
909 1 : // same sequence number, so the ordering within the batch doesn't matter.
910 1 : err = firstError(err, b.DeleteRange(o.exciseStart, o.exciseEnd, t.writeOpts))
911 1 : err = firstError(err, b.RangeKeyDelete(o.exciseStart, o.exciseEnd, t.writeOpts))
912 1 : }
913 1 : path, writerMeta, err2 := buildForIngest(t, o.dbID, b, 0 /* i */)
914 1 : if err2 != nil {
915 0 : h.Recordf("Build(%s) // %v", o.batchID, err2)
916 0 : return
917 0 : }
918 1 : err = firstError(err, b.Close())
919 1 :
920 1 : if writerMeta.Properties.NumEntries == 0 && writerMeta.Properties.NumRangeKeys() == 0 {
921 1 : h.Recordf("%s // %v", o, o.simulateExcise(db, t))
922 1 : return
923 1 : }
924 :
925 1 : if t.testOpts.useExcise {
926 1 : err = firstError(err, t.withRetries(func() error {
927 1 : _, err := db.IngestAndExcise([]string{path}, nil /* shared */, nil /* external */, pebble.KeyRange{
928 1 : Start: o.exciseStart,
929 1 : End: o.exciseEnd,
930 1 : }, o.sstContainsExciseTombstone)
931 1 : return err
932 1 : }))
933 1 : } else {
934 1 : err = firstError(err, o.simulateExcise(db, t))
935 1 : err = firstError(err, t.withRetries(func() error {
936 1 : return db.Ingest([]string{path})
937 1 : }))
938 : }
939 :
940 1 : h.Recordf("%s // %v", o, err)
941 : }
942 :
943 1 : func (o *ingestAndExciseOp) simulateExcise(db *pebble.DB, t *Test) error {
944 1 : // Simulate the excise using a DeleteRange and RangeKeyDelete.
945 1 : return errors.CombineErrors(
946 1 : db.DeleteRange(o.exciseStart, o.exciseEnd, t.writeOpts),
947 1 : db.RangeKeyDelete(o.exciseStart, o.exciseEnd, t.writeOpts),
948 1 : )
949 1 : }
950 :
951 1 : func (o *ingestAndExciseOp) receiver() objID { return o.dbID }
952 1 : func (o *ingestAndExciseOp) syncObjs() objIDSlice {
953 1 : // Ingest should not be concurrent with mutating the batches that will be
954 1 : // ingested as sstables.
955 1 : objs := []objID{o.batchID}
956 1 : if o.derivedDBID != o.dbID {
957 0 : objs = append(objs, o.derivedDBID)
958 0 : }
959 1 : return objs
960 : }
961 :
962 1 : func (o *ingestAndExciseOp) String() string {
963 1 : return fmt.Sprintf("%s.IngestAndExcise(%s, %q, %q, %t /* sstContainsExciseTombstone */)", o.dbID, o.batchID, o.exciseStart, o.exciseEnd, o.sstContainsExciseTombstone)
964 1 : }
965 :
966 0 : func (o *ingestAndExciseOp) keys() []*[]byte {
967 0 : return []*[]byte{&o.exciseStart, &o.exciseEnd}
968 0 : }
969 :
970 0 : func (o *ingestAndExciseOp) diagramKeyRanges() []pebble.KeyRange {
971 0 : return []pebble.KeyRange{{Start: o.exciseStart, End: o.exciseEnd}}
972 0 : }
973 :
974 : // ingestExternalFilesOp models a DB.IngestExternalFiles operation.
975 : //
976 : // When remote storage is not enabled, the operation is emulated using the
977 : // regular DB.Ingest; this serves as a cross-check of the result.
978 : type ingestExternalFilesOp struct {
979 : dbID objID
980 : // The bounds of the objects cannot overlap.
981 : objs []externalObjWithBounds
982 : }
983 :
984 : type externalObjWithBounds struct {
985 : externalObjID objID
986 :
987 : // bounds for the external object. These bounds apply after keys undergo
988 : // any prefix or suffix transforms.
989 : bounds pebble.KeyRange
990 :
991 : syntheticPrefix sstable.SyntheticPrefix
992 : syntheticSuffix sstable.SyntheticSuffix
993 : }
994 :
995 1 : func (o *ingestExternalFilesOp) run(t *Test, h historyRecorder) {
996 1 : db := t.getDB(o.dbID)
997 1 :
998 1 : var err error
999 1 : if !t.testOpts.externalStorageEnabled {
1000 1 : // Emulate the operation by crating local, truncated SST files and ingesting
1001 1 : // them.
1002 1 : var paths []string
1003 1 : for i, obj := range o.objs {
1004 1 : // Make sure the object exists and is not empty.
1005 1 : path, sstMeta := buildForIngestExternalEmulation(
1006 1 : t, o.dbID, obj.externalObjID, obj.bounds, obj.syntheticSuffix, obj.syntheticPrefix, i,
1007 1 : )
1008 1 : if sstMeta.HasPointKeys || sstMeta.HasRangeKeys || sstMeta.HasRangeDelKeys {
1009 1 : paths = append(paths, path)
1010 1 : }
1011 : }
1012 1 : if len(paths) > 0 {
1013 1 : err = db.Ingest(paths)
1014 1 : }
1015 1 : } else {
1016 1 : external := make([]pebble.ExternalFile, len(o.objs))
1017 1 : for i, obj := range o.objs {
1018 1 : meta := t.getExternalObj(obj.externalObjID)
1019 1 : external[i] = pebble.ExternalFile{
1020 1 : Locator: "external",
1021 1 : ObjName: externalObjName(obj.externalObjID),
1022 1 : Size: meta.sstMeta.Size,
1023 1 : StartKey: obj.bounds.Start,
1024 1 : EndKey: obj.bounds.End,
1025 1 : EndKeyIsInclusive: false,
1026 1 : // Note: if the table has point/range keys, we don't know for sure whether
1027 1 : // this particular range has any, but that's acceptable.
1028 1 : HasPointKey: meta.sstMeta.HasPointKeys || meta.sstMeta.HasRangeDelKeys,
1029 1 : HasRangeKey: meta.sstMeta.HasRangeKeys,
1030 1 : SyntheticSuffix: obj.syntheticSuffix,
1031 1 : }
1032 1 : if obj.syntheticPrefix.IsSet() {
1033 1 : external[i].SyntheticPrefix = obj.syntheticPrefix
1034 1 : }
1035 : }
1036 1 : _, err = db.IngestExternalFiles(external)
1037 : }
1038 :
1039 1 : h.Recordf("%s // %v", o, err)
1040 : }
1041 :
1042 1 : func (o *ingestExternalFilesOp) receiver() objID { return o.dbID }
1043 1 : func (o *ingestExternalFilesOp) syncObjs() objIDSlice {
1044 1 : res := make(objIDSlice, len(o.objs))
1045 1 : for i := range res {
1046 1 : res[i] = o.objs[i].externalObjID
1047 1 : }
1048 : // Deduplicate the IDs.
1049 1 : slices.Sort(res)
1050 1 : return slices.Compact(res)
1051 : }
1052 :
1053 1 : func (o *ingestExternalFilesOp) String() string {
1054 1 : strs := make([]string, len(o.objs))
1055 1 : for i, obj := range o.objs {
1056 1 : strs[i] = fmt.Sprintf("%s, %q /* start */, %q /* end */, %q /* syntheticSuffix */, %q /* syntheticPrefix */",
1057 1 : obj.externalObjID, obj.bounds.Start, obj.bounds.End, obj.syntheticSuffix, obj.syntheticPrefix,
1058 1 : )
1059 1 : }
1060 1 : return fmt.Sprintf("%s.IngestExternalFiles(%s)", o.dbID, strings.Join(strs, ", "))
1061 : }
1062 :
1063 0 : func (o *ingestExternalFilesOp) keys() []*[]byte {
1064 0 : // If any of the objects have synthetic prefixes, we can't allow modification
1065 0 : // of external object bounds.
1066 0 : for i := range o.objs {
1067 0 : if o.objs[i].syntheticPrefix.IsSet() {
1068 0 : return nil
1069 0 : }
1070 : }
1071 :
1072 0 : var res []*[]byte
1073 0 : for i := range o.objs {
1074 0 : res = append(res, &o.objs[i].bounds.Start, &o.objs[i].bounds.End)
1075 0 : }
1076 0 : return res
1077 : }
1078 :
1079 0 : func (o *ingestExternalFilesOp) diagramKeyRanges() []pebble.KeyRange {
1080 0 : ranges := make([]pebble.KeyRange, len(o.objs))
1081 0 : for i, obj := range o.objs {
1082 0 : ranges[i] = obj.bounds
1083 0 : }
1084 0 : return ranges
1085 : }
1086 :
1087 : // getOp models a Reader.Get operation.
1088 : type getOp struct {
1089 : readerID objID
1090 : key []byte
1091 : derivedDBID objID
1092 : }
1093 :
1094 1 : func (o *getOp) run(t *Test, h historyRecorder) {
1095 1 : r := t.getReader(o.readerID)
1096 1 : var val []byte
1097 1 : var closer io.Closer
1098 1 : err := t.withRetries(func() (err error) {
1099 1 : val, closer, err = r.Get(o.key)
1100 1 : return err
1101 1 : })
1102 1 : h.Recordf("%s // [%q] %v", o, val, err)
1103 1 : if closer != nil {
1104 1 : closer.Close()
1105 1 : }
1106 : }
1107 :
1108 1 : func (o *getOp) String() string { return fmt.Sprintf("%s.Get(%q)", o.readerID, o.key) }
1109 1 : func (o *getOp) receiver() objID { return o.readerID }
1110 1 : func (o *getOp) syncObjs() objIDSlice {
1111 1 : if o.readerID.tag() == dbTag {
1112 1 : return nil
1113 1 : }
1114 : // batch.Get reads through to the current database state.
1115 1 : if o.derivedDBID != 0 {
1116 1 : return []objID{o.derivedDBID}
1117 1 : }
1118 0 : return nil
1119 : }
1120 :
1121 0 : func (o *getOp) keys() []*[]byte {
1122 0 : return []*[]byte{&o.key}
1123 0 : }
1124 :
1125 0 : func (o *getOp) diagramKeyRanges() []pebble.KeyRange {
1126 0 : return []pebble.KeyRange{{Start: o.key, End: o.key}}
1127 0 : }
1128 :
1129 : // newIterOp models a Reader.NewIter operation.
1130 : type newIterOp struct {
1131 : readerID objID
1132 : iterID objID
1133 : iterOpts
1134 : derivedDBID objID
1135 : }
1136 :
1137 : // Enable this to enable debug logging of range key iterator operations.
1138 : const debugIterators = false
1139 :
1140 1 : func (o *newIterOp) run(t *Test, h historyRecorder) {
1141 1 : r := t.getReader(o.readerID)
1142 1 : opts := iterOptions(o.iterOpts)
1143 1 : if debugIterators {
1144 0 : opts.DebugRangeKeyStack = true
1145 0 : }
1146 :
1147 1 : var i *pebble.Iterator
1148 1 : for {
1149 1 : i, _ = r.NewIter(opts)
1150 1 : if err := i.Error(); !errors.Is(err, errorfs.ErrInjected) {
1151 1 : break
1152 : }
1153 : // close this iter and retry NewIter
1154 0 : _ = i.Close()
1155 : }
1156 1 : t.setIter(o.iterID, i)
1157 1 :
1158 1 : // Trash the bounds to ensure that Pebble doesn't rely on the stability of
1159 1 : // the user-provided bounds.
1160 1 : if opts != nil {
1161 1 : rand.Read(opts.LowerBound[:])
1162 1 : rand.Read(opts.UpperBound[:])
1163 1 : }
1164 1 : h.Recordf("%s // %v", o, i.Error())
1165 : }
1166 :
1167 1 : func (o *newIterOp) String() string {
1168 1 : return fmt.Sprintf("%s = %s.NewIter(%q, %q, %d /* key types */, %d, %d, %t /* use L6 filters */, %q /* masking suffix */)",
1169 1 : o.iterID, o.readerID, o.lower, o.upper, o.keyTypes, o.filterMin, o.filterMax, o.useL6Filters, o.maskSuffix)
1170 1 : }
1171 :
1172 1 : func (o *newIterOp) receiver() objID { return o.readerID }
1173 1 : func (o *newIterOp) syncObjs() objIDSlice {
1174 1 : // Prevent o.iterID ops from running before it exists.
1175 1 : objs := []objID{o.iterID}
1176 1 : // If reading through a batch or snapshot, the new iterator will also observe database
1177 1 : // state, and we must synchronize on the database state for a consistent
1178 1 : // view.
1179 1 : if o.readerID.tag() == batchTag || o.readerID.tag() == snapTag {
1180 1 : objs = append(objs, o.derivedDBID)
1181 1 : }
1182 1 : return objs
1183 : }
1184 :
1185 0 : func (o *newIterOp) keys() []*[]byte {
1186 0 : var res []*[]byte
1187 0 : if o.lower != nil {
1188 0 : res = append(res, &o.lower)
1189 0 : }
1190 0 : if o.upper != nil {
1191 0 : res = append(res, &o.upper)
1192 0 : }
1193 0 : return res
1194 : }
1195 :
1196 1 : func (o *newIterOp) diagramKeyRanges() []pebble.KeyRange {
1197 1 : var res []pebble.KeyRange
1198 1 : if o.lower != nil {
1199 0 : res = append(res, pebble.KeyRange{Start: o.lower, End: o.lower})
1200 0 : }
1201 1 : if o.upper != nil {
1202 0 : res = append(res, pebble.KeyRange{Start: o.upper, End: o.upper})
1203 0 : }
1204 1 : return res
1205 : }
1206 :
1207 : // newIterUsingCloneOp models a Iterator.Clone operation.
1208 : type newIterUsingCloneOp struct {
1209 : existingIterID objID
1210 : iterID objID
1211 : refreshBatch bool
1212 : iterOpts
1213 :
1214 : // derivedReaderID is the ID of the underlying reader that backs both the
1215 : // existing iterator and the new iterator. The derivedReaderID is NOT
1216 : // serialized by String and is derived from other operations during parse.
1217 : derivedReaderID objID
1218 : }
1219 :
1220 1 : func (o *newIterUsingCloneOp) run(t *Test, h historyRecorder) {
1221 1 : iter := t.getIter(o.existingIterID)
1222 1 : cloneOpts := pebble.CloneOptions{
1223 1 : IterOptions: iterOptions(o.iterOpts),
1224 1 : RefreshBatchView: o.refreshBatch,
1225 1 : }
1226 1 : i, err := iter.iter.Clone(cloneOpts)
1227 1 : if err != nil {
1228 0 : panic(err)
1229 : }
1230 1 : t.setIter(o.iterID, i)
1231 1 : h.Recordf("%s // %v", o, i.Error())
1232 : }
1233 :
1234 1 : func (o *newIterUsingCloneOp) String() string {
1235 1 : return fmt.Sprintf("%s = %s.Clone(%t, %q, %q, %d /* key types */, %d, %d, %t /* use L6 filters */, %q /* masking suffix */)",
1236 1 : o.iterID, o.existingIterID, o.refreshBatch, o.lower, o.upper,
1237 1 : o.keyTypes, o.filterMin, o.filterMax, o.useL6Filters, o.maskSuffix)
1238 1 : }
1239 :
1240 1 : func (o *newIterUsingCloneOp) receiver() objID { return o.existingIterID }
1241 :
1242 1 : func (o *newIterUsingCloneOp) syncObjs() objIDSlice {
1243 1 : objIDs := []objID{o.iterID}
1244 1 : // If the underlying reader is a batch, we must synchronize with the batch.
1245 1 : // If refreshBatch=true, synchronizing is necessary to observe all the
1246 1 : // mutations up to until this op and no more. Even when refreshBatch=false,
1247 1 : // we must synchronize because iterator construction may access state cached
1248 1 : // on the indexed batch to avoid refragmenting range tombstones or range
1249 1 : // keys.
1250 1 : if o.derivedReaderID.tag() == batchTag {
1251 0 : objIDs = append(objIDs, o.derivedReaderID)
1252 0 : }
1253 1 : return objIDs
1254 : }
1255 :
1256 0 : func (o *newIterUsingCloneOp) keys() []*[]byte { return nil }
1257 0 : func (o *newIterUsingCloneOp) diagramKeyRanges() []pebble.KeyRange { return nil }
1258 :
1259 : // iterSetBoundsOp models an Iterator.SetBounds operation.
1260 : type iterSetBoundsOp struct {
1261 : iterID objID
1262 : lower []byte
1263 : upper []byte
1264 : }
1265 :
1266 1 : func (o *iterSetBoundsOp) run(t *Test, h historyRecorder) {
1267 1 : i := t.getIter(o.iterID)
1268 1 : var lower, upper []byte
1269 1 : if o.lower != nil {
1270 1 : lower = append(lower, o.lower...)
1271 1 : }
1272 1 : if o.upper != nil {
1273 1 : upper = append(upper, o.upper...)
1274 1 : }
1275 1 : i.SetBounds(lower, upper)
1276 1 :
1277 1 : // Trash the bounds to ensure that Pebble doesn't rely on the stability of
1278 1 : // the user-provided bounds.
1279 1 : rand.Read(lower[:])
1280 1 : rand.Read(upper[:])
1281 1 :
1282 1 : h.Recordf("%s // %v", o, i.Error())
1283 : }
1284 :
1285 1 : func (o *iterSetBoundsOp) String() string {
1286 1 : return fmt.Sprintf("%s.SetBounds(%q, %q)", o.iterID, o.lower, o.upper)
1287 1 : }
1288 :
1289 1 : func (o *iterSetBoundsOp) receiver() objID { return o.iterID }
1290 1 : func (o *iterSetBoundsOp) syncObjs() objIDSlice { return nil }
1291 :
1292 0 : func (o *iterSetBoundsOp) keys() []*[]byte {
1293 0 : return []*[]byte{&o.lower, &o.upper}
1294 0 : }
1295 :
1296 0 : func (o *iterSetBoundsOp) diagramKeyRanges() []pebble.KeyRange {
1297 0 : return []pebble.KeyRange{{Start: o.lower, End: o.upper}}
1298 0 : }
1299 :
1300 : // iterSetOptionsOp models an Iterator.SetOptions operation.
1301 : type iterSetOptionsOp struct {
1302 : iterID objID
1303 : iterOpts
1304 :
1305 : // derivedReaderID is the ID of the underlying reader that backs the
1306 : // iterator. The derivedReaderID is NOT serialized by String and is derived
1307 : // from other operations during parse.
1308 : derivedReaderID objID
1309 : }
1310 :
1311 1 : func (o *iterSetOptionsOp) run(t *Test, h historyRecorder) {
1312 1 : i := t.getIter(o.iterID)
1313 1 :
1314 1 : opts := iterOptions(o.iterOpts)
1315 1 : if opts == nil {
1316 1 : opts = &pebble.IterOptions{}
1317 1 : }
1318 1 : i.SetOptions(opts)
1319 1 :
1320 1 : // Trash the bounds to ensure that Pebble doesn't rely on the stability of
1321 1 : // the user-provided bounds.
1322 1 : rand.Read(opts.LowerBound[:])
1323 1 : rand.Read(opts.UpperBound[:])
1324 1 :
1325 1 : h.Recordf("%s // %v", o, i.Error())
1326 : }
1327 :
1328 1 : func (o *iterSetOptionsOp) String() string {
1329 1 : return fmt.Sprintf("%s.SetOptions(%q, %q, %d /* key types */, %d, %d, %t /* use L6 filters */, %q /* masking suffix */)",
1330 1 : o.iterID, o.lower, o.upper, o.keyTypes, o.filterMin, o.filterMax, o.useL6Filters, o.maskSuffix)
1331 1 : }
1332 :
1333 1 : func iterOptions(o iterOpts) *pebble.IterOptions {
1334 1 : if o.IsZero() && !debugIterators {
1335 1 : return nil
1336 1 : }
1337 1 : var lower, upper []byte
1338 1 : if o.lower != nil {
1339 1 : lower = append(lower, o.lower...)
1340 1 : }
1341 1 : if o.upper != nil {
1342 1 : upper = append(upper, o.upper...)
1343 1 : }
1344 1 : opts := &pebble.IterOptions{
1345 1 : LowerBound: lower,
1346 1 : UpperBound: upper,
1347 1 : KeyTypes: pebble.IterKeyType(o.keyTypes),
1348 1 : RangeKeyMasking: pebble.RangeKeyMasking{
1349 1 : Suffix: o.maskSuffix,
1350 1 : },
1351 1 : UseL6Filters: o.useL6Filters,
1352 1 : DebugRangeKeyStack: debugIterators,
1353 1 : }
1354 1 : if opts.RangeKeyMasking.Suffix != nil {
1355 1 : opts.RangeKeyMasking.Filter = func() pebble.BlockPropertyFilterMask {
1356 1 : return sstable.NewTestKeysMaskingFilter()
1357 1 : }
1358 : }
1359 1 : if o.filterMax > 0 {
1360 1 : opts.PointKeyFilters = []pebble.BlockPropertyFilter{
1361 1 : sstable.NewTestKeysBlockPropertyFilter(o.filterMin, o.filterMax),
1362 1 : }
1363 1 : // Enforce the timestamp bounds in SkipPoint, so that the iterator never
1364 1 : // returns a key outside the filterMin, filterMax bounds. This provides
1365 1 : // deterministic iteration.
1366 1 : opts.SkipPoint = func(k []byte) (skip bool) {
1367 1 : n := testkeys.Comparer.Split(k)
1368 1 : if n == len(k) {
1369 1 : // No suffix, don't skip it.
1370 1 : return false
1371 1 : }
1372 1 : v, err := testkeys.ParseSuffix(k[n:])
1373 1 : if err != nil {
1374 0 : panic(err)
1375 : }
1376 1 : ts := uint64(v)
1377 1 : return ts < o.filterMin || ts >= o.filterMax
1378 : }
1379 : }
1380 1 : return opts
1381 : }
1382 :
1383 1 : func (o *iterSetOptionsOp) receiver() objID { return o.iterID }
1384 :
1385 1 : func (o *iterSetOptionsOp) syncObjs() objIDSlice {
1386 1 : if o.derivedReaderID.tag() == batchTag {
1387 0 : // If the underlying reader is a batch, we must synchronize with the
1388 0 : // batch so that we observe all the mutations up until this operation
1389 0 : // and no more.
1390 0 : return []objID{o.derivedReaderID}
1391 0 : }
1392 1 : return nil
1393 : }
1394 :
1395 0 : func (o *iterSetOptionsOp) keys() []*[]byte { return nil }
1396 0 : func (o *iterSetOptionsOp) diagramKeyRanges() []pebble.KeyRange { return nil }
1397 :
1398 : // iterSeekGEOp models an Iterator.SeekGE[WithLimit] operation.
1399 : type iterSeekGEOp struct {
1400 : iterID objID
1401 : key []byte
1402 : limit []byte
1403 :
1404 : derivedReaderID objID
1405 : }
1406 :
1407 1 : func iteratorPos(i *retryableIter) string {
1408 1 : var buf bytes.Buffer
1409 1 : fmt.Fprintf(&buf, "%q", i.Key())
1410 1 : hasPoint, hasRange := i.HasPointAndRange()
1411 1 : if hasPoint {
1412 1 : fmt.Fprintf(&buf, ",%q", i.Value())
1413 1 : } else {
1414 1 : fmt.Fprint(&buf, ",<no point>")
1415 1 : }
1416 1 : if hasRange {
1417 1 : start, end := i.RangeBounds()
1418 1 : fmt.Fprintf(&buf, ",[%q,%q)=>{", start, end)
1419 1 : for i, rk := range i.RangeKeys() {
1420 1 : if i > 0 {
1421 1 : fmt.Fprint(&buf, ",")
1422 1 : }
1423 1 : fmt.Fprintf(&buf, "%q=%q", rk.Suffix, rk.Value)
1424 : }
1425 1 : fmt.Fprint(&buf, "}")
1426 1 : } else {
1427 1 : fmt.Fprint(&buf, ",<no range>")
1428 1 : }
1429 1 : if i.RangeKeyChanged() {
1430 1 : fmt.Fprint(&buf, "*")
1431 1 : }
1432 1 : return buf.String()
1433 : }
1434 :
1435 1 : func validBoolToStr(valid bool) string {
1436 1 : return fmt.Sprintf("%t", valid)
1437 1 : }
1438 :
1439 1 : func validityStateToStr(validity pebble.IterValidityState) (bool, string) {
1440 1 : // We can't distinguish between IterExhausted and IterAtLimit in a
1441 1 : // deterministic manner.
1442 1 : switch validity {
1443 1 : case pebble.IterExhausted, pebble.IterAtLimit:
1444 1 : return false, "invalid"
1445 1 : case pebble.IterValid:
1446 1 : return true, "valid"
1447 0 : default:
1448 0 : panic("unknown validity")
1449 : }
1450 : }
1451 :
1452 1 : func (o *iterSeekGEOp) run(t *Test, h historyRecorder) {
1453 1 : i := t.getIter(o.iterID)
1454 1 : var valid bool
1455 1 : var validStr string
1456 1 : if o.limit == nil {
1457 1 : valid = i.SeekGE(o.key)
1458 1 : validStr = validBoolToStr(valid)
1459 1 : } else {
1460 1 : valid, validStr = validityStateToStr(i.SeekGEWithLimit(o.key, o.limit))
1461 1 : }
1462 1 : if valid {
1463 1 : h.Recordf("%s // [%s,%s] %v", o, validStr, iteratorPos(i), i.Error())
1464 1 : } else {
1465 1 : h.Recordf("%s // [%s] %v", o, validStr, i.Error())
1466 1 : }
1467 : }
1468 :
1469 1 : func (o *iterSeekGEOp) String() string {
1470 1 : return fmt.Sprintf("%s.SeekGE(%q, %q)", o.iterID, o.key, o.limit)
1471 1 : }
1472 1 : func (o *iterSeekGEOp) receiver() objID { return o.iterID }
1473 1 : func (o *iterSeekGEOp) syncObjs() objIDSlice { return onlyBatchIDs(o.derivedReaderID) }
1474 :
1475 0 : func (o *iterSeekGEOp) keys() []*[]byte {
1476 0 : return []*[]byte{&o.key}
1477 0 : }
1478 :
1479 1 : func (o *iterSeekGEOp) diagramKeyRanges() []pebble.KeyRange {
1480 1 : return []pebble.KeyRange{{Start: o.key, End: o.key}}
1481 1 : }
1482 :
1483 1 : func onlyBatchIDs(ids ...objID) objIDSlice {
1484 1 : var ret objIDSlice
1485 1 : for _, id := range ids {
1486 1 : if id.tag() == batchTag {
1487 0 : ret = append(ret, id)
1488 0 : }
1489 : }
1490 1 : return ret
1491 : }
1492 :
1493 : // iterSeekPrefixGEOp models an Iterator.SeekPrefixGE operation.
1494 : type iterSeekPrefixGEOp struct {
1495 : iterID objID
1496 : key []byte
1497 :
1498 : derivedReaderID objID
1499 : }
1500 :
1501 1 : func (o *iterSeekPrefixGEOp) run(t *Test, h historyRecorder) {
1502 1 : i := t.getIter(o.iterID)
1503 1 : valid := i.SeekPrefixGE(o.key)
1504 1 : if valid {
1505 1 : h.Recordf("%s // [%t,%s] %v", o, valid, iteratorPos(i), i.Error())
1506 1 : } else {
1507 1 : h.Recordf("%s // [%t] %v", o, valid, i.Error())
1508 1 : }
1509 : }
1510 :
1511 1 : func (o *iterSeekPrefixGEOp) String() string {
1512 1 : return fmt.Sprintf("%s.SeekPrefixGE(%q)", o.iterID, o.key)
1513 1 : }
1514 1 : func (o *iterSeekPrefixGEOp) receiver() objID { return o.iterID }
1515 1 : func (o *iterSeekPrefixGEOp) syncObjs() objIDSlice { return onlyBatchIDs(o.derivedReaderID) }
1516 :
1517 0 : func (o *iterSeekPrefixGEOp) keys() []*[]byte {
1518 0 : return []*[]byte{&o.key}
1519 0 : }
1520 :
1521 0 : func (o *iterSeekPrefixGEOp) diagramKeyRanges() []pebble.KeyRange {
1522 0 : return []pebble.KeyRange{{Start: o.key, End: o.key}}
1523 0 : }
1524 :
1525 : // iterSeekLTOp models an Iterator.SeekLT[WithLimit] operation.
1526 : type iterSeekLTOp struct {
1527 : iterID objID
1528 : key []byte
1529 : limit []byte
1530 :
1531 : derivedReaderID objID
1532 : }
1533 :
1534 1 : func (o *iterSeekLTOp) run(t *Test, h historyRecorder) {
1535 1 : i := t.getIter(o.iterID)
1536 1 : var valid bool
1537 1 : var validStr string
1538 1 : if o.limit == nil {
1539 1 : valid = i.SeekLT(o.key)
1540 1 : validStr = validBoolToStr(valid)
1541 1 : } else {
1542 1 : valid, validStr = validityStateToStr(i.SeekLTWithLimit(o.key, o.limit))
1543 1 : }
1544 1 : if valid {
1545 1 : h.Recordf("%s // [%s,%s] %v", o, validStr, iteratorPos(i), i.Error())
1546 1 : } else {
1547 1 : h.Recordf("%s // [%s] %v", o, validStr, i.Error())
1548 1 : }
1549 : }
1550 :
1551 1 : func (o *iterSeekLTOp) String() string {
1552 1 : return fmt.Sprintf("%s.SeekLT(%q, %q)", o.iterID, o.key, o.limit)
1553 1 : }
1554 :
1555 1 : func (o *iterSeekLTOp) receiver() objID { return o.iterID }
1556 1 : func (o *iterSeekLTOp) syncObjs() objIDSlice { return onlyBatchIDs(o.derivedReaderID) }
1557 :
1558 0 : func (o *iterSeekLTOp) keys() []*[]byte {
1559 0 : return []*[]byte{&o.key}
1560 0 : }
1561 :
1562 0 : func (o *iterSeekLTOp) diagramKeyRanges() []pebble.KeyRange {
1563 0 : return []pebble.KeyRange{{Start: o.key, End: o.key}}
1564 0 : }
1565 :
1566 : // iterFirstOp models an Iterator.First operation.
1567 : type iterFirstOp struct {
1568 : iterID objID
1569 :
1570 : derivedReaderID objID
1571 : }
1572 :
1573 1 : func (o *iterFirstOp) run(t *Test, h historyRecorder) {
1574 1 : i := t.getIter(o.iterID)
1575 1 : valid := i.First()
1576 1 : if valid {
1577 1 : h.Recordf("%s // [%t,%s] %v", o, valid, iteratorPos(i), i.Error())
1578 1 : } else {
1579 1 : h.Recordf("%s // [%t] %v", o, valid, i.Error())
1580 1 : }
1581 : }
1582 :
1583 1 : func (o *iterFirstOp) String() string { return fmt.Sprintf("%s.First()", o.iterID) }
1584 1 : func (o *iterFirstOp) receiver() objID { return o.iterID }
1585 1 : func (o *iterFirstOp) syncObjs() objIDSlice { return onlyBatchIDs(o.derivedReaderID) }
1586 :
1587 0 : func (o *iterFirstOp) keys() []*[]byte { return nil }
1588 0 : func (o *iterFirstOp) diagramKeyRanges() []pebble.KeyRange { return nil }
1589 :
1590 : // iterLastOp models an Iterator.Last operation.
1591 : type iterLastOp struct {
1592 : iterID objID
1593 :
1594 : derivedReaderID objID
1595 : }
1596 :
1597 1 : func (o *iterLastOp) run(t *Test, h historyRecorder) {
1598 1 : i := t.getIter(o.iterID)
1599 1 : valid := i.Last()
1600 1 : if valid {
1601 1 : h.Recordf("%s // [%t,%s] %v", o, valid, iteratorPos(i), i.Error())
1602 1 : } else {
1603 1 : h.Recordf("%s // [%t] %v", o, valid, i.Error())
1604 1 : }
1605 : }
1606 :
1607 1 : func (o *iterLastOp) String() string { return fmt.Sprintf("%s.Last()", o.iterID) }
1608 1 : func (o *iterLastOp) receiver() objID { return o.iterID }
1609 1 : func (o *iterLastOp) syncObjs() objIDSlice { return onlyBatchIDs(o.derivedReaderID) }
1610 :
1611 0 : func (o *iterLastOp) keys() []*[]byte { return nil }
1612 0 : func (o *iterLastOp) diagramKeyRanges() []pebble.KeyRange { return nil }
1613 :
1614 : // iterNextOp models an Iterator.Next[WithLimit] operation.
1615 : type iterNextOp struct {
1616 : iterID objID
1617 : limit []byte
1618 :
1619 : derivedReaderID objID
1620 : }
1621 :
1622 1 : func (o *iterNextOp) run(t *Test, h historyRecorder) {
1623 1 : i := t.getIter(o.iterID)
1624 1 : var valid bool
1625 1 : var validStr string
1626 1 : if o.limit == nil {
1627 1 : valid = i.Next()
1628 1 : validStr = validBoolToStr(valid)
1629 1 : } else {
1630 1 : valid, validStr = validityStateToStr(i.NextWithLimit(o.limit))
1631 1 : }
1632 1 : if valid {
1633 1 : h.Recordf("%s // [%s,%s] %v", o, validStr, iteratorPos(i), i.Error())
1634 1 : } else {
1635 1 : h.Recordf("%s // [%s] %v", o, validStr, i.Error())
1636 1 : }
1637 : }
1638 :
1639 1 : func (o *iterNextOp) String() string { return fmt.Sprintf("%s.Next(%q)", o.iterID, o.limit) }
1640 1 : func (o *iterNextOp) receiver() objID { return o.iterID }
1641 1 : func (o *iterNextOp) syncObjs() objIDSlice { return onlyBatchIDs(o.derivedReaderID) }
1642 :
1643 0 : func (o *iterNextOp) keys() []*[]byte { return nil }
1644 0 : func (o *iterNextOp) diagramKeyRanges() []pebble.KeyRange { return nil }
1645 :
1646 : // iterNextPrefixOp models an Iterator.NextPrefix operation.
1647 : type iterNextPrefixOp struct {
1648 : iterID objID
1649 :
1650 : derivedReaderID objID
1651 : }
1652 :
1653 1 : func (o *iterNextPrefixOp) run(t *Test, h historyRecorder) {
1654 1 : i := t.getIter(o.iterID)
1655 1 : valid := i.NextPrefix()
1656 1 : validStr := validBoolToStr(valid)
1657 1 : if valid {
1658 1 : h.Recordf("%s // [%s,%s] %v", o, validStr, iteratorPos(i), i.Error())
1659 1 : } else {
1660 1 : h.Recordf("%s // [%s] %v", o, validStr, i.Error())
1661 1 : }
1662 : }
1663 :
1664 1 : func (o *iterNextPrefixOp) String() string { return fmt.Sprintf("%s.NextPrefix()", o.iterID) }
1665 1 : func (o *iterNextPrefixOp) receiver() objID { return o.iterID }
1666 1 : func (o *iterNextPrefixOp) syncObjs() objIDSlice { return onlyBatchIDs(o.derivedReaderID) }
1667 :
1668 0 : func (o *iterNextPrefixOp) keys() []*[]byte { return nil }
1669 0 : func (o *iterNextPrefixOp) diagramKeyRanges() []pebble.KeyRange { return nil }
1670 :
1671 : // iterCanSingleDelOp models a call to CanDeterministicallySingleDelete with an
1672 : // Iterator.
1673 : type iterCanSingleDelOp struct {
1674 : iterID objID
1675 :
1676 : derivedReaderID objID
1677 : }
1678 :
1679 1 : func (o *iterCanSingleDelOp) run(t *Test, h historyRecorder) {
1680 1 : // TODO(jackson): When we perform error injection, we'll need to rethink
1681 1 : // this.
1682 1 : _, err := pebble.CanDeterministicallySingleDelete(t.getIter(o.iterID).iter)
1683 1 : // The return value of CanDeterministicallySingleDelete is dependent on
1684 1 : // internal LSM state and non-deterministic, so we don't record it.
1685 1 : // Including the operation within the metamorphic test at all helps ensure
1686 1 : // that it does not change the result of any other Iterator operation that
1687 1 : // should be deterministic, regardless of its own outcome.
1688 1 : //
1689 1 : // We still record the value of the error because it's deterministic, at
1690 1 : // least for now. The possible error cases are:
1691 1 : // - The iterator was already in an error state when the operation ran.
1692 1 : // - The operation is deterministically invalid (like using an InternalNext
1693 1 : // to change directions.)
1694 1 : h.Recordf("%s // %v", o, err)
1695 1 : }
1696 :
1697 1 : func (o *iterCanSingleDelOp) String() string { return fmt.Sprintf("%s.InternalNext()", o.iterID) }
1698 1 : func (o *iterCanSingleDelOp) receiver() objID { return o.iterID }
1699 1 : func (o *iterCanSingleDelOp) syncObjs() objIDSlice { return onlyBatchIDs(o.derivedReaderID) }
1700 :
1701 0 : func (o *iterCanSingleDelOp) keys() []*[]byte { return nil }
1702 0 : func (o *iterCanSingleDelOp) diagramKeyRanges() []pebble.KeyRange { return nil }
1703 :
1704 : // iterPrevOp models an Iterator.Prev[WithLimit] operation.
1705 : type iterPrevOp struct {
1706 : iterID objID
1707 : limit []byte
1708 :
1709 : derivedReaderID objID
1710 : }
1711 :
1712 1 : func (o *iterPrevOp) run(t *Test, h historyRecorder) {
1713 1 : i := t.getIter(o.iterID)
1714 1 : var valid bool
1715 1 : var validStr string
1716 1 : if o.limit == nil {
1717 1 : valid = i.Prev()
1718 1 : validStr = validBoolToStr(valid)
1719 1 : } else {
1720 1 : valid, validStr = validityStateToStr(i.PrevWithLimit(o.limit))
1721 1 : }
1722 1 : if valid {
1723 1 : h.Recordf("%s // [%s,%s] %v", o, validStr, iteratorPos(i), i.Error())
1724 1 : } else {
1725 1 : h.Recordf("%s // [%s] %v", o, validStr, i.Error())
1726 1 : }
1727 : }
1728 :
1729 1 : func (o *iterPrevOp) String() string { return fmt.Sprintf("%s.Prev(%q)", o.iterID, o.limit) }
1730 1 : func (o *iterPrevOp) receiver() objID { return o.iterID }
1731 1 : func (o *iterPrevOp) syncObjs() objIDSlice { return onlyBatchIDs(o.derivedReaderID) }
1732 :
1733 0 : func (o *iterPrevOp) keys() []*[]byte { return nil }
1734 0 : func (o *iterPrevOp) diagramKeyRanges() []pebble.KeyRange { return nil }
1735 :
1736 : // newSnapshotOp models a DB.NewSnapshot operation.
1737 : type newSnapshotOp struct {
1738 : dbID objID
1739 : snapID objID
1740 : // If nonempty, this snapshot must not be used to read any keys outside of
1741 : // the provided bounds. This allows some implementations to use 'Eventually
1742 : // file-only snapshots,' which require bounds.
1743 : bounds []pebble.KeyRange
1744 : }
1745 :
1746 1 : func (o *newSnapshotOp) run(t *Test, h historyRecorder) {
1747 1 : bounds := o.bounds
1748 1 : if len(bounds) == 0 {
1749 0 : panic("bounds unexpectedly unset for newSnapshotOp")
1750 : }
1751 : // Fibonacci hash https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
1752 1 : createEfos := ((11400714819323198485 * uint64(t.idx) * t.testOpts.seedEFOS) >> 63) == 1
1753 1 : // If either of these options is true, an EFOS _must_ be created, regardless
1754 1 : // of what the fibonacci hash returned.
1755 1 : excisePossible := t.testOpts.useSharedReplicate || t.testOpts.useExternalReplicate || t.testOpts.useExcise
1756 1 : if createEfos || excisePossible {
1757 1 : s := t.getDB(o.dbID).NewEventuallyFileOnlySnapshot(bounds)
1758 1 : t.setSnapshot(o.snapID, s)
1759 1 : } else {
1760 1 : s := t.getDB(o.dbID).NewSnapshot()
1761 1 : t.setSnapshot(o.snapID, s)
1762 1 : }
1763 1 : h.Recordf("%s", o)
1764 : }
1765 :
1766 1 : func (o *newSnapshotOp) String() string {
1767 1 : var buf bytes.Buffer
1768 1 : fmt.Fprintf(&buf, "%s = %s.NewSnapshot(", o.snapID, o.dbID)
1769 1 : for i := range o.bounds {
1770 1 : if i > 0 {
1771 1 : fmt.Fprint(&buf, ", ")
1772 1 : }
1773 1 : fmt.Fprintf(&buf, "%q, %q", o.bounds[i].Start, o.bounds[i].End)
1774 : }
1775 1 : fmt.Fprint(&buf, ")")
1776 1 : return buf.String()
1777 : }
1778 1 : func (o *newSnapshotOp) receiver() objID { return o.dbID }
1779 1 : func (o *newSnapshotOp) syncObjs() objIDSlice { return []objID{o.snapID} }
1780 :
1781 1 : func (o *newSnapshotOp) keys() []*[]byte {
1782 1 : var res []*[]byte
1783 1 : for i := range o.bounds {
1784 1 : res = append(res, &o.bounds[i].Start, &o.bounds[i].End)
1785 1 : }
1786 1 : return res
1787 : }
1788 :
1789 1 : func (o *newSnapshotOp) diagramKeyRanges() []pebble.KeyRange {
1790 1 : return o.bounds
1791 1 : }
1792 :
1793 : // newExternalObjOp models a DB.NewExternalObj operation.
1794 : type newExternalObjOp struct {
1795 : batchID objID
1796 : externalObjID objID
1797 : }
1798 :
1799 1 : func externalObjName(externalObjID objID) string {
1800 1 : if externalObjID.tag() != externalObjTag {
1801 0 : panic(fmt.Sprintf("invalid externalObjID %s", externalObjID))
1802 : }
1803 1 : return fmt.Sprintf("external-for-ingest-%d.sst", externalObjID.slot())
1804 : }
1805 :
1806 1 : func (o *newExternalObjOp) run(t *Test, h historyRecorder) {
1807 1 : b := t.getBatch(o.batchID)
1808 1 : t.clearObj(o.batchID)
1809 1 :
1810 1 : writeCloser, err := t.externalStorage.CreateObject(externalObjName(o.externalObjID))
1811 1 : if err != nil {
1812 0 : panic(err)
1813 : }
1814 1 : writable := objstorageprovider.NewRemoteWritable(writeCloser)
1815 1 :
1816 1 : iter, rangeDelIter, rangeKeyIter := private.BatchSort(b)
1817 1 :
1818 1 : sstMeta, err := writeSSTForIngestion(
1819 1 : t,
1820 1 : iter, rangeDelIter, rangeKeyIter,
1821 1 : true, /* uniquePrefixes */
1822 1 : nil, /* syntheticSuffix */
1823 1 : nil, /* syntheticPrefix */
1824 1 : writable,
1825 1 : t.minFMV(),
1826 1 : )
1827 1 : if err != nil {
1828 0 : panic(err)
1829 : }
1830 1 : if sstMeta.HasRangeKeys {
1831 0 : // #3287: IngestExternalFiles currently doesn't support range keys; we check
1832 0 : // for range keys in newExternalObj.
1833 0 : panic("external object has range keys")
1834 : }
1835 1 : t.setExternalObj(o.externalObjID, externalObjMeta{
1836 1 : sstMeta: sstMeta,
1837 1 : })
1838 1 : h.Recordf("%s", o)
1839 : }
1840 :
1841 1 : func (o *newExternalObjOp) String() string {
1842 1 : return fmt.Sprintf("%s = %s.NewExternalObj()", o.externalObjID, o.batchID)
1843 1 : }
1844 1 : func (o *newExternalObjOp) receiver() objID { return o.batchID }
1845 1 : func (o *newExternalObjOp) syncObjs() objIDSlice { return []objID{o.externalObjID} }
1846 :
1847 0 : func (o *newExternalObjOp) keys() []*[]byte { return nil }
1848 0 : func (o *newExternalObjOp) diagramKeyRanges() []pebble.KeyRange { return nil }
1849 :
1850 : type dbRatchetFormatMajorVersionOp struct {
1851 : dbID objID
1852 : vers pebble.FormatMajorVersion
1853 : }
1854 :
1855 1 : func (o *dbRatchetFormatMajorVersionOp) run(t *Test, h historyRecorder) {
1856 1 : var err error
1857 1 : // NB: We no-op the operation if we're already at or above the provided
1858 1 : // format major version. Different runs start at different format major
1859 1 : // versions, making the presence of an error and the error message itself
1860 1 : // non-deterministic if we attempt to upgrade to an older version.
1861 1 : //
1862 1 : //Regardless, subsequent operations should behave identically, which is what
1863 1 : //we're really aiming to test by including this format major version ratchet
1864 1 : //operation.
1865 1 : if t.getDB(o.dbID).FormatMajorVersion() < o.vers {
1866 1 : err = t.getDB(o.dbID).RatchetFormatMajorVersion(o.vers)
1867 1 : }
1868 1 : h.Recordf("%s // %v", o, err)
1869 : }
1870 :
1871 1 : func (o *dbRatchetFormatMajorVersionOp) String() string {
1872 1 : return fmt.Sprintf("%s.RatchetFormatMajorVersion(%s)", o.dbID, o.vers)
1873 1 : }
1874 1 : func (o *dbRatchetFormatMajorVersionOp) receiver() objID { return o.dbID }
1875 1 : func (o *dbRatchetFormatMajorVersionOp) syncObjs() objIDSlice { return nil }
1876 :
1877 0 : func (o *dbRatchetFormatMajorVersionOp) keys() []*[]byte { return nil }
1878 0 : func (o *dbRatchetFormatMajorVersionOp) diagramKeyRanges() []pebble.KeyRange { return nil }
1879 :
1880 : type dbRestartOp struct {
1881 : dbID objID
1882 :
1883 : // affectedObjects is the list of additional objects that are affected by this
1884 : // operation, and which syncObjs() must return so that we don't perform the
1885 : // restart in parallel with other operations to affected objects.
1886 : affectedObjects []objID
1887 : }
1888 :
1889 1 : func (o *dbRestartOp) run(t *Test, h historyRecorder) {
1890 1 : if err := t.restartDB(o.dbID); err != nil {
1891 0 : h.Recordf("%s // %v", o, err)
1892 0 : h.history.err.Store(errors.Wrap(err, "dbRestartOp"))
1893 1 : } else {
1894 1 : h.Recordf("%s", o)
1895 1 : }
1896 : }
1897 :
1898 1 : func (o *dbRestartOp) String() string { return fmt.Sprintf("%s.Restart()", o.dbID) }
1899 1 : func (o *dbRestartOp) receiver() objID { return o.dbID }
1900 1 : func (o *dbRestartOp) syncObjs() objIDSlice { return o.affectedObjects }
1901 :
1902 0 : func (o *dbRestartOp) keys() []*[]byte { return nil }
1903 0 : func (o *dbRestartOp) diagramKeyRanges() []pebble.KeyRange { return nil }
1904 :
1905 1 : func formatOps(ops []op) string {
1906 1 : var buf strings.Builder
1907 1 : for _, op := range ops {
1908 1 : fmt.Fprintf(&buf, "%s\n", op)
1909 1 : }
1910 1 : return buf.String()
1911 : }
1912 :
1913 : // replicateOp models an operation that could copy keys from one db to
1914 : // another through either an IngestAndExcise, or an Ingest.
1915 : type replicateOp struct {
1916 : source, dest objID
1917 : start, end []byte
1918 : }
1919 :
1920 : func (r *replicateOp) runSharedReplicate(
1921 : t *Test, h historyRecorder, source, dest *pebble.DB, w *sstable.Writer, sstPath string,
1922 0 : ) {
1923 0 : var sharedSSTs []pebble.SharedSSTMeta
1924 0 : var err error
1925 0 : err = source.ScanInternal(context.TODO(), sstable.CategoryAndQoS{}, r.start, r.end,
1926 0 : func(key *pebble.InternalKey, value pebble.LazyValue, _ pebble.IteratorLevel) error {
1927 0 : val, _, err := value.Value(nil)
1928 0 : if err != nil {
1929 0 : panic(err)
1930 : }
1931 0 : return w.Add(base.MakeInternalKey(key.UserKey, 0, key.Kind()), val)
1932 : },
1933 0 : func(start, end []byte, seqNum uint64) error {
1934 0 : return w.DeleteRange(start, end)
1935 0 : },
1936 0 : func(start, end []byte, keys []keyspan.Key) error {
1937 0 : s := keyspan.Span{
1938 0 : Start: start,
1939 0 : End: end,
1940 0 : Keys: keys,
1941 0 : }
1942 0 : return rangekey.Encode(&s, w.AddRangeKey)
1943 0 : },
1944 0 : func(sst *pebble.SharedSSTMeta) error {
1945 0 : sharedSSTs = append(sharedSSTs, *sst)
1946 0 : return nil
1947 0 : },
1948 : nil,
1949 : )
1950 0 : if err != nil {
1951 0 : h.Recordf("%s // %v", r, err)
1952 0 : return
1953 0 : }
1954 :
1955 0 : err = w.Close()
1956 0 : if err != nil {
1957 0 : h.Recordf("%s // %v", r, err)
1958 0 : return
1959 0 : }
1960 0 : meta, err := w.Metadata()
1961 0 : if err != nil {
1962 0 : h.Recordf("%s // %v", r, err)
1963 0 : return
1964 0 : }
1965 0 : if len(sharedSSTs) == 0 && meta.Properties.NumEntries == 0 && meta.Properties.NumRangeKeys() == 0 {
1966 0 : // IngestAndExcise below will be a no-op. We should do a
1967 0 : // DeleteRange+RangeKeyDel to mimic the behaviour of the non-shared-replicate
1968 0 : // case.
1969 0 : //
1970 0 : // TODO(bilal): Remove this when we support excises with no matching ingests.
1971 0 : if err := dest.RangeKeyDelete(r.start, r.end, t.writeOpts); err != nil {
1972 0 : h.Recordf("%s // %v", r, err)
1973 0 : return
1974 0 : }
1975 0 : err := dest.DeleteRange(r.start, r.end, t.writeOpts)
1976 0 : h.Recordf("%s // %v", r, err)
1977 0 : return
1978 : }
1979 :
1980 0 : _, err = dest.IngestAndExcise([]string{sstPath}, sharedSSTs, nil /* external */, pebble.KeyRange{Start: r.start, End: r.end}, false)
1981 0 : h.Recordf("%s // %v", r, err)
1982 : }
1983 :
1984 : func (r *replicateOp) runExternalReplicate(
1985 : t *Test, h historyRecorder, source, dest *pebble.DB, w *sstable.Writer, sstPath string,
1986 0 : ) {
1987 0 : var externalSSTs []pebble.ExternalFile
1988 0 : var err error
1989 0 : err = source.ScanInternal(context.TODO(), sstable.CategoryAndQoS{}, r.start, r.end,
1990 0 : func(key *pebble.InternalKey, value pebble.LazyValue, _ pebble.IteratorLevel) error {
1991 0 : val, _, err := value.Value(nil)
1992 0 : if err != nil {
1993 0 : panic(err)
1994 : }
1995 0 : return w.Add(base.MakeInternalKey(key.UserKey, 0, key.Kind()), val)
1996 : },
1997 0 : func(start, end []byte, seqNum uint64) error {
1998 0 : return w.DeleteRange(start, end)
1999 0 : },
2000 0 : func(start, end []byte, keys []keyspan.Key) error {
2001 0 : s := keyspan.Span{
2002 0 : Start: start,
2003 0 : End: end,
2004 0 : Keys: keys,
2005 0 : }
2006 0 : return rangekey.Encode(&s, w.AddRangeKey)
2007 0 : },
2008 : nil,
2009 0 : func(sst *pebble.ExternalFile) error {
2010 0 : externalSSTs = append(externalSSTs, *sst)
2011 0 : return nil
2012 0 : },
2013 : )
2014 0 : if err != nil {
2015 0 : h.Recordf("%s // %v", r, err)
2016 0 : return
2017 0 : }
2018 :
2019 0 : err = w.Close()
2020 0 : if err != nil {
2021 0 : h.Recordf("%s // %v", r, err)
2022 0 : return
2023 0 : }
2024 0 : meta, err := w.Metadata()
2025 0 : if err != nil {
2026 0 : h.Recordf("%s // %v", r, err)
2027 0 : return
2028 0 : }
2029 0 : if len(externalSSTs) == 0 && meta.Properties.NumEntries == 0 && meta.Properties.NumRangeKeys() == 0 {
2030 0 : // IngestAndExcise below will be a no-op. We should do a
2031 0 : // DeleteRange+RangeKeyDel to mimic the behaviour of the non-external-replicate
2032 0 : // case.
2033 0 : //
2034 0 : // TODO(bilal): Remove this when we support excises with no matching ingests.
2035 0 : if err := dest.RangeKeyDelete(r.start, r.end, t.writeOpts); err != nil {
2036 0 : h.Recordf("%s // %v", r, err)
2037 0 : return
2038 0 : }
2039 0 : err := dest.DeleteRange(r.start, r.end, t.writeOpts)
2040 0 : h.Recordf("%s // %v", r, err)
2041 0 : return
2042 : }
2043 :
2044 0 : _, err = dest.IngestAndExcise([]string{sstPath}, nil, externalSSTs /* external */, pebble.KeyRange{Start: r.start, End: r.end}, false /* sstContainsExciseTombstone */)
2045 0 : h.Recordf("%s // %v", r, err)
2046 : }
2047 :
2048 0 : func (r *replicateOp) run(t *Test, h historyRecorder) {
2049 0 : // Shared replication only works if shared storage is enabled.
2050 0 : useSharedIngest := t.testOpts.useSharedReplicate && t.testOpts.sharedStorageEnabled
2051 0 : useExternalIngest := t.testOpts.useExternalReplicate && t.testOpts.externalStorageEnabled
2052 0 :
2053 0 : source := t.getDB(r.source)
2054 0 : dest := t.getDB(r.dest)
2055 0 : sstPath := path.Join(t.tmpDir, fmt.Sprintf("ext-replicate%d.sst", t.idx))
2056 0 : f, err := t.opts.FS.Create(sstPath, vfs.WriteCategoryUnspecified)
2057 0 : if err != nil {
2058 0 : h.Recordf("%s // %v", r, err)
2059 0 : return
2060 0 : }
2061 0 : w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), t.opts.MakeWriterOptions(0, dest.FormatMajorVersion().MaxTableFormat()))
2062 0 :
2063 0 : // NB: In practice we'll either do shared replicate or external replicate,
2064 0 : // as ScanInternal does not support both. We arbitrarily choose to prioritize
2065 0 : // external replication if both are enabled, as those are likely to hit
2066 0 : // widespread usage first.
2067 0 : if useExternalIngest {
2068 0 : r.runExternalReplicate(t, h, source, dest, w, sstPath)
2069 0 : return
2070 0 : }
2071 0 : if useSharedIngest {
2072 0 : r.runSharedReplicate(t, h, source, dest, w, sstPath)
2073 0 : return
2074 0 : }
2075 :
2076 : // First, do a RangeKeyDelete and DeleteRange on the whole span.
2077 0 : if err := dest.RangeKeyDelete(r.start, r.end, t.writeOpts); err != nil {
2078 0 : h.Recordf("%s // %v", r, err)
2079 0 : return
2080 0 : }
2081 0 : if err := dest.DeleteRange(r.start, r.end, t.writeOpts); err != nil {
2082 0 : h.Recordf("%s // %v", r, err)
2083 0 : return
2084 0 : }
2085 0 : iter, err := source.NewIter(&pebble.IterOptions{
2086 0 : LowerBound: r.start,
2087 0 : UpperBound: r.end,
2088 0 : KeyTypes: pebble.IterKeyTypePointsAndRanges,
2089 0 : })
2090 0 : if err != nil {
2091 0 : panic(err)
2092 : }
2093 0 : defer iter.Close()
2094 0 :
2095 0 : for ok := iter.SeekGE(r.start); ok && iter.Error() == nil; ok = iter.Next() {
2096 0 : hasPoint, hasRange := iter.HasPointAndRange()
2097 0 : if hasPoint {
2098 0 : val, err := iter.ValueAndErr()
2099 0 : if err != nil {
2100 0 : panic(err)
2101 : }
2102 0 : if err := w.Set(iter.Key(), val); err != nil {
2103 0 : panic(err)
2104 : }
2105 : }
2106 0 : if hasRange && iter.RangeKeyChanged() {
2107 0 : rangeKeys := iter.RangeKeys()
2108 0 : rkStart, rkEnd := iter.RangeBounds()
2109 0 :
2110 0 : span := &keyspan.Span{Start: rkStart, End: rkEnd, Keys: make([]keyspan.Key, len(rangeKeys))}
2111 0 : for i := range rangeKeys {
2112 0 : span.Keys[i] = keyspan.Key{
2113 0 : Trailer: base.MakeTrailer(0, base.InternalKeyKindRangeKeySet),
2114 0 : Suffix: rangeKeys[i].Suffix,
2115 0 : Value: rangeKeys[i].Value,
2116 0 : }
2117 0 : }
2118 0 : keyspan.SortKeysByTrailer(&span.Keys)
2119 0 : if err := rangekey.Encode(span, w.AddRangeKey); err != nil {
2120 0 : panic(err)
2121 : }
2122 : }
2123 : }
2124 0 : if err := iter.Error(); err != nil {
2125 0 : h.Recordf("%s // %v", r, err)
2126 0 : return
2127 0 : }
2128 0 : if err := w.Close(); err != nil {
2129 0 : panic(err)
2130 : }
2131 :
2132 0 : err = dest.Ingest([]string{sstPath})
2133 0 : h.Recordf("%s // %v", r, err)
2134 : }
2135 :
2136 1 : func (r *replicateOp) String() string {
2137 1 : return fmt.Sprintf("%s.Replicate(%s, %q, %q)", r.source, r.dest, r.start, r.end)
2138 1 : }
2139 :
2140 0 : func (r *replicateOp) receiver() objID { return r.source }
2141 0 : func (r *replicateOp) syncObjs() objIDSlice { return objIDSlice{r.dest} }
2142 :
2143 0 : func (r *replicateOp) keys() []*[]byte {
2144 0 : return []*[]byte{&r.start, &r.end}
2145 0 : }
2146 :
2147 1 : func (r *replicateOp) diagramKeyRanges() []pebble.KeyRange {
2148 1 : return []pebble.KeyRange{{Start: r.start, End: r.end}}
2149 1 : }
|