Line data Source code
1 : // Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package metamorphic
6 :
7 : import (
8 : "bytes"
9 : "fmt"
10 : "math"
11 : "math/rand/v2"
12 : "os"
13 : "slices"
14 :
15 : "github.com/cockroachdb/pebble"
16 : "github.com/cockroachdb/pebble/internal/randvar"
17 : "github.com/cockroachdb/pebble/sstable"
18 : )
19 :
20 : const maxValueSize = 20
21 :
22 : type iterOpts struct {
23 : lower []byte
24 : upper []byte
25 : keyTypes uint32 // pebble.IterKeyType
26 : // maskSuffix may be set if keyTypes is IterKeyTypePointsAndRanges to
27 : // configure IterOptions.RangeKeyMasking.Suffix.
28 : maskSuffix []byte
29 :
30 : // If filterMax is != nil, this iterator will filter out any keys that have
31 : // suffixes that don't fall within the range [filterMin,filterMax).
32 : // Additionally, the iterator will be constructed with a block-property
33 : // filter that filters out blocks accordingly. Not all OPTIONS hook up the
34 : // corresponding block property collector, so block-filtering may still be
35 : // effectively disabled in some runs. The iterator operations themselves
36 : // however will always skip past any points that should be filtered to
37 : // ensure determinism.
38 : filterMin []byte
39 : filterMax []byte
40 :
41 : // see IterOptions.UseL6Filters.
42 : useL6Filters bool
43 :
44 : // NB: If adding or removing fields, ensure IsZero is in sync.
45 : }
46 :
47 1 : func (o iterOpts) IsZero() bool {
48 1 : return o.lower == nil && o.upper == nil && o.keyTypes == 0 &&
49 1 : o.maskSuffix == nil && o.filterMin == nil && o.filterMax == nil && !o.useL6Filters
50 1 : }
51 :
52 : // GenerateOps generates n random operations, drawing randomness from the
53 : // provided pseudorandom generator and using cfg to determine the distribution
54 : // of op types.
55 1 : func GenerateOps(rng *rand.Rand, n uint64, cfg OpConfig) Ops {
56 1 : // Generate a new set of random ops, writing them to <dir>/ops. These will be
57 1 : // read by the child processes when performing a test run.
58 1 : return newGenerator(rng, cfg, newKeyManager(1 /* num instances */)).generate(n)
59 1 : }
60 :
61 : type generator struct {
62 : cfg OpConfig
63 : rng *rand.Rand
64 :
65 : init *initOp
66 : ops []op
67 :
68 : // keyManager tracks the state of keys a operation generation time.
69 : keyManager *keyManager
70 : keyGenerator KeyGenerator
71 : dbs objIDSlice
72 : // Unordered sets of object IDs for live objects. Used to randomly select on
73 : // object when generating an operation. There are 4 concrete objects: the DB
74 : // (of which there is exactly 1), batches, iterators, and snapshots.
75 : //
76 : // liveBatches contains the live indexed and write-only batches.
77 : liveBatches objIDSlice
78 : // liveIters contains the live iterators.
79 : liveIters objIDSlice
80 : itersLastOpts map[objID]iterOpts
81 : // liveReaders contains the DB, and any live indexed batches and snapshots. The DB is always
82 : // at index 0.
83 : liveReaders objIDSlice
84 : // liveSnapshots contains the live snapshots.
85 : liveSnapshots objIDSlice
86 : // liveWriters contains the DB, and any live batches. The DB is always at index 0.
87 : liveWriters objIDSlice
88 : // externalObjects contains the external objects created.
89 : externalObjects objIDSlice
90 :
91 : // Maps used to find associated objects during generation. These maps are not
92 : // needed during test execution.
93 : //
94 : // batchID -> batch iters: used to keep track of the open iterators on an
95 : // indexed batch. The iter set value will also be indexed by the readers map.
96 : batches map[objID]objIDSet
97 : // iterID -> reader iters: used to keep track of all of the open
98 : // iterators. The iter set value will also be indexed by either the batches
99 : // or snapshots maps.
100 : iters map[objID]objIDSet
101 : // objectID -> db: used to keep track of the DB a batch, iter, or snapshot
102 : // was created on. It should be read through the dbIDForObj method.
103 : objDB map[objID]objID
104 : // readerID -> reader iters: used to keep track of the open iterators on a
105 : // reader. The iter set value will also be indexed by either the batches or
106 : // snapshots maps. This map is the union of batches and snapshots maps.
107 : readers map[objID]objIDSet
108 : // snapshotID -> snapshot iters: used to keep track of the open iterators on
109 : // a snapshot. The iter set value will also be indexed by the readers map.
110 : snapshots map[objID]objIDSet
111 : // snapshotID -> bounds of the snapshot: only populated for snapshots that
112 : // are constrained by bounds.
113 : snapshotBounds map[objID][]pebble.KeyRange
114 : // iterVisibleKeys is the set of keys that should be visible to the
115 : // iterator.
116 : iterVisibleKeys map[objID][][]byte
117 : // iterReaderID is a map from an iterID to a readerID.
118 : iterReaderID map[objID]objID
119 : }
120 :
121 1 : func newGenerator(rng *rand.Rand, cfg OpConfig, km *keyManager) *generator {
122 1 : keyGenerator := km.kf.NewGenerator(km, rng, cfg)
123 1 : g := &generator{
124 1 : cfg: cfg,
125 1 : rng: rng,
126 1 : init: &initOp{dbSlots: uint32(cfg.numInstances)},
127 1 : keyManager: km,
128 1 : keyGenerator: keyGenerator,
129 1 : liveReaders: objIDSlice{makeObjID(dbTag, 1)},
130 1 : liveWriters: objIDSlice{makeObjID(dbTag, 1)},
131 1 : dbs: objIDSlice{makeObjID(dbTag, 1)},
132 1 : objDB: make(map[objID]objID),
133 1 : batches: make(map[objID]objIDSet),
134 1 : iters: make(map[objID]objIDSet),
135 1 : readers: make(map[objID]objIDSet),
136 1 : snapshots: make(map[objID]objIDSet),
137 1 : snapshotBounds: make(map[objID][]pebble.KeyRange),
138 1 : itersLastOpts: make(map[objID]iterOpts),
139 1 : iterVisibleKeys: make(map[objID][][]byte),
140 1 : iterReaderID: make(map[objID]objID),
141 1 : }
142 1 : for i := 1; i < cfg.numInstances; i++ {
143 1 : g.liveReaders = append(g.liveReaders, makeObjID(dbTag, uint32(i+1)))
144 1 : g.liveWriters = append(g.liveWriters, makeObjID(dbTag, uint32(i+1)))
145 1 : g.dbs = append(g.dbs, makeObjID(dbTag, uint32(i+1)))
146 1 : }
147 : // Note that the initOp fields are populated during generation.
148 1 : g.ops = append(g.ops, g.init)
149 1 : return g
150 : }
151 :
152 : // generate generates [count] operations according to the generator's configured
153 : // distributions.
154 1 : func (g *generator) generate(count uint64) []op {
155 1 : opGenerators := []func(){
156 1 : OpBatchAbort: g.batchAbort,
157 1 : OpBatchCommit: g.batchCommit,
158 1 : OpDBCheckpoint: g.dbCheckpoint,
159 1 : OpDBCompact: g.dbCompact,
160 1 : OpDBDownload: g.dbDownload,
161 1 : OpDBFlush: g.dbFlush,
162 1 : OpDBRatchetFormatMajorVersion: g.dbRatchetFormatMajorVersion,
163 1 : OpDBRestart: g.dbRestart,
164 1 : OpIterClose: g.randIter(g.iterClose),
165 1 : OpIterFirst: g.randIter(g.iterFirst),
166 1 : OpIterLast: g.randIter(g.iterLast),
167 1 : OpIterNext: g.randIter(g.iterNext),
168 1 : OpIterNextWithLimit: g.randIter(g.iterNextWithLimit),
169 1 : OpIterNextPrefix: g.randIter(g.iterNextPrefix),
170 1 : OpIterCanSingleDelete: g.randIter(g.iterCanSingleDelete),
171 1 : OpIterPrev: g.randIter(g.iterPrev),
172 1 : OpIterPrevWithLimit: g.randIter(g.iterPrevWithLimit),
173 1 : OpIterSeekGE: g.randIter(g.iterSeekGE),
174 1 : OpIterSeekGEWithLimit: g.randIter(g.iterSeekGEWithLimit),
175 1 : OpIterSeekLT: g.randIter(g.iterSeekLT),
176 1 : OpIterSeekLTWithLimit: g.randIter(g.iterSeekLTWithLimit),
177 1 : OpIterSeekPrefixGE: g.randIter(g.iterSeekPrefixGE),
178 1 : OpIterSetBounds: g.randIter(g.iterSetBounds),
179 1 : OpIterSetOptions: g.randIter(g.iterSetOptions),
180 1 : OpNewBatch: g.newBatch,
181 1 : OpNewIndexedBatch: g.newIndexedBatch,
182 1 : OpNewIter: g.newIter,
183 1 : OpNewIterUsingClone: g.newIterUsingClone,
184 1 : OpNewSnapshot: g.newSnapshot,
185 1 : OpNewExternalObj: g.newExternalObj,
186 1 : OpReaderGet: g.readerGet,
187 1 : OpReplicate: g.replicate,
188 1 : OpSnapshotClose: g.snapshotClose,
189 1 : OpWriterApply: g.writerApply,
190 1 : OpWriterDelete: g.writerDelete,
191 1 : OpWriterDeleteRange: g.writerDeleteRange,
192 1 : OpWriterIngest: g.writerIngest,
193 1 : OpWriterIngestAndExcise: g.writerIngestAndExcise,
194 1 : OpWriterIngestExternalFiles: g.writerIngestExternalFiles,
195 1 : OpWriterLogData: g.writerLogData,
196 1 : OpWriterMerge: g.writerMerge,
197 1 : OpWriterRangeKeyDelete: g.writerRangeKeyDelete,
198 1 : OpWriterRangeKeySet: g.writerRangeKeySet,
199 1 : OpWriterRangeKeyUnset: g.writerRangeKeyUnset,
200 1 : OpWriterSet: g.writerSet,
201 1 : OpWriterSingleDelete: g.writerSingleDelete,
202 1 : }
203 1 :
204 1 : // TPCC-style deck of cards randomization. Every time the end of the deck is
205 1 : // reached, we shuffle the deck.
206 1 : deck := randvar.NewDeck(g.rng, g.cfg.ops[:]...)
207 1 :
208 1 : defer func() {
209 1 : if r := recover(); r != nil {
210 0 : fmt.Fprintln(os.Stderr, formatOps(g.keyManager.kf, g.ops))
211 0 : panic(r)
212 : }
213 : }()
214 1 : for i := uint64(0); i < count; i++ {
215 1 : opGenerators[deck.Int()]()
216 1 : }
217 :
218 1 : g.dbClose()
219 1 :
220 1 : computeDerivedFields(g.ops)
221 1 : return g.ops
222 : }
223 :
224 1 : func (g *generator) add(op op) {
225 1 : g.ops = append(g.ops, op)
226 1 : g.keyManager.update(op)
227 1 : }
228 :
229 : // prefixKeyRange generates a [start, end) pair consisting of two prefix keys.
230 1 : func (g *generator) prefixKeyRange() ([]byte, []byte) {
231 1 : keys := g.keyGenerator.UniqueKeys(2, func() []byte { return g.keyGenerator.RandPrefix(0.01) })
232 1 : return keys[0], keys[1]
233 : }
234 :
235 1 : func (g *generator) randKeyToSingleDelete(id objID) []byte {
236 1 : keys := g.keyManager.eligibleSingleDeleteKeys(id)
237 1 : length := len(keys)
238 1 : if length == 0 {
239 1 : return nil
240 1 : }
241 1 : return keys[g.rng.IntN(length)]
242 : }
243 :
244 1 : func resizeBuffer(buf []byte, prefixLen, suffixLen int) []byte {
245 1 : if cap(buf) >= prefixLen+suffixLen {
246 0 : return buf[:prefixLen+suffixLen]
247 0 : }
248 1 : return make([]byte, prefixLen+suffixLen)
249 : }
250 :
251 1 : func (g *generator) newBatch() {
252 1 : batchID := makeObjID(batchTag, g.init.batchSlots)
253 1 : g.init.batchSlots++
254 1 : g.liveBatches = append(g.liveBatches, batchID)
255 1 : g.liveWriters = append(g.liveWriters, batchID)
256 1 : dbID := g.dbs.rand(g.rng)
257 1 : g.objDB[batchID] = dbID
258 1 :
259 1 : g.add(&newBatchOp{
260 1 : dbID: dbID,
261 1 : batchID: batchID,
262 1 : })
263 1 : }
264 :
265 1 : func (g *generator) newIndexedBatch() {
266 1 : batchID := makeObjID(batchTag, g.init.batchSlots)
267 1 : g.init.batchSlots++
268 1 : g.liveBatches = append(g.liveBatches, batchID)
269 1 : g.liveReaders = append(g.liveReaders, batchID)
270 1 : g.liveWriters = append(g.liveWriters, batchID)
271 1 :
272 1 : iters := make(objIDSet)
273 1 : g.batches[batchID] = iters
274 1 : g.readers[batchID] = iters
275 1 : dbID := g.dbs.rand(g.rng)
276 1 : g.objDB[batchID] = dbID
277 1 :
278 1 : g.add(&newIndexedBatchOp{
279 1 : dbID: dbID,
280 1 : batchID: batchID,
281 1 : })
282 1 : }
283 :
284 : // removeFromBatchGenerator will not generate a closeOp for the target batch as
285 : // not every batch that is removed from the generator should be closed. For
286 : // example, running a closeOp before an ingestOp that contains the closed batch
287 : // will cause an error.
288 1 : func (g *generator) removeBatchFromGenerator(batchID objID) {
289 1 : g.liveBatches.remove(batchID)
290 1 : iters := g.batches[batchID]
291 1 : delete(g.batches, batchID)
292 1 :
293 1 : if iters != nil {
294 1 : g.liveReaders.remove(batchID)
295 1 : delete(g.readers, batchID)
296 1 : }
297 1 : g.liveWriters.remove(batchID)
298 1 : for _, id := range iters.sorted() {
299 1 : g.liveIters.remove(id)
300 1 : delete(g.iters, id)
301 1 : g.add(&closeOp{objID: id})
302 1 : }
303 : }
304 :
305 1 : func (g *generator) batchAbort() {
306 1 : if len(g.liveBatches) == 0 {
307 1 : return
308 1 : }
309 :
310 1 : batchID := g.liveBatches.rand(g.rng)
311 1 : g.removeBatchFromGenerator(batchID)
312 1 :
313 1 : g.add(&closeOp{objID: batchID})
314 : }
315 :
316 1 : func (g *generator) batchCommit() {
317 1 : if len(g.liveBatches) == 0 {
318 1 : return
319 1 : }
320 :
321 1 : batchID := g.liveBatches.rand(g.rng)
322 1 : dbID := g.dbIDForObj(batchID)
323 1 : g.removeBatchFromGenerator(batchID)
324 1 :
325 1 : // The batch we're applying may contain single delete tombstones that when
326 1 : // applied to the writer result in nondeterminism in the deleted key. If
327 1 : // that's the case, we can restore determinism by first deleting the key
328 1 : // from the writer.
329 1 : //
330 1 : // Generating additional operations here is not ideal, but it simplifies
331 1 : // single delete invariants significantly.
332 1 : singleDeleteConflicts := g.keyManager.checkForSingleDelConflicts(batchID, dbID, false /* collapsed */)
333 1 : for _, conflict := range singleDeleteConflicts {
334 0 : g.add(&deleteOp{
335 0 : writerID: dbID,
336 0 : key: conflict,
337 0 : derivedDBID: dbID,
338 0 : })
339 0 : }
340 :
341 1 : g.add(&batchCommitOp{
342 1 : dbID: dbID,
343 1 : batchID: batchID,
344 1 : })
345 1 : g.add(&closeOp{objID: batchID})
346 :
347 : }
348 :
349 1 : func (g *generator) dbClose() {
350 1 : // Close any live iterators and snapshots, so that we can close the DB
351 1 : // cleanly.
352 1 : for len(g.liveIters) > 0 {
353 1 : g.randIter(g.iterClose)()
354 1 : }
355 1 : for len(g.liveSnapshots) > 0 {
356 1 : g.snapshotClose()
357 1 : }
358 1 : for len(g.liveBatches) > 0 {
359 0 : batchID := g.liveBatches[0]
360 0 : g.removeBatchFromGenerator(batchID)
361 0 : g.add(&closeOp{objID: batchID})
362 0 : }
363 1 : for len(g.dbs) > 0 {
364 1 : db := g.dbs[0]
365 1 : g.dbs = g.dbs[1:]
366 1 : g.add(&closeOp{objID: db})
367 1 : }
368 : }
369 :
370 1 : func (g *generator) dbCheckpoint() {
371 1 : numSpans := g.expRandInt(1)
372 1 : var spans []pebble.CheckpointSpan
373 1 : if numSpans > 0 {
374 1 : spans = make([]pebble.CheckpointSpan, numSpans)
375 1 : }
376 1 : for i := range spans {
377 1 : start := g.keyGenerator.RandKey(0.01)
378 1 : end := g.keyGenerator.RandKey(0.01)
379 1 : if g.cmp(start, end) > 0 {
380 1 : start, end = end, start
381 1 : }
382 1 : spans[i].Start = start
383 1 : spans[i].End = end
384 : }
385 1 : dbID := g.dbs.rand(g.rng)
386 1 : g.add(&checkpointOp{
387 1 : dbID: dbID,
388 1 : spans: spans,
389 1 : })
390 : }
391 :
392 1 : func (g *generator) dbCompact() {
393 1 : // Generate new key(s) with a 1% probability.
394 1 : start := g.keyGenerator.RandKey(0.01)
395 1 : end := g.keyGenerator.RandKey(0.01)
396 1 : if g.cmp(start, end) > 0 {
397 1 : start, end = end, start
398 1 : }
399 1 : dbID := g.dbs.rand(g.rng)
400 1 : g.add(&compactOp{
401 1 : dbID: dbID,
402 1 : start: start,
403 1 : end: end,
404 1 : parallelize: g.rng.Float64() < 0.5,
405 1 : })
406 : }
407 :
408 1 : func (g *generator) dbDownload() {
409 1 : numSpans := 1 + g.expRandInt(1)
410 1 : spans := make([]pebble.DownloadSpan, numSpans)
411 1 : for i := range spans {
412 1 : keys := g.keyGenerator.UniqueKeys(2, func() []byte { return g.keyGenerator.RandKey(0.001) })
413 1 : start, end := keys[0], keys[1]
414 1 : spans[i].StartKey = start
415 1 : spans[i].EndKey = end
416 1 : spans[i].ViaBackingFileDownload = g.rng.IntN(2) == 0
417 : }
418 1 : dbID := g.dbs.rand(g.rng)
419 1 : g.add(&downloadOp{
420 1 : dbID: dbID,
421 1 : spans: spans,
422 1 : })
423 : }
424 :
425 1 : func (g *generator) dbFlush() {
426 1 : g.add(&flushOp{g.dbs.rand(g.rng)})
427 1 : }
428 :
429 1 : func (g *generator) dbRatchetFormatMajorVersion() {
430 1 : // Ratchet to a random format major version between the minimum the
431 1 : // metamorphic tests support and the newest. At runtime, the generated
432 1 : // version may be behind the database's format major version, in which case
433 1 : // RatchetFormatMajorVersion should deterministically error.
434 1 :
435 1 : dbID := g.dbs.rand(g.rng)
436 1 : n := int(newestFormatMajorVersionToTest - minimumFormatMajorVersion)
437 1 : vers := pebble.FormatMajorVersion(g.rng.IntN(n+1)) + minimumFormatMajorVersion
438 1 : g.add(&dbRatchetFormatMajorVersionOp{dbID: dbID, vers: vers})
439 1 : }
440 :
441 1 : func (g *generator) dbRestart() {
442 1 : // Close any live iterators and snapshots, so that we can close the DB
443 1 : // cleanly.
444 1 : dbID := g.dbs.rand(g.rng)
445 1 : for len(g.liveIters) > 0 {
446 1 : g.randIter(g.iterClose)()
447 1 : }
448 1 : for len(g.liveSnapshots) > 0 {
449 1 : g.snapshotClose()
450 1 : }
451 : // Close the batches.
452 1 : for len(g.liveBatches) > 0 {
453 1 : batchID := g.liveBatches[0]
454 1 : g.removeBatchFromGenerator(batchID)
455 1 : g.add(&closeOp{objID: batchID})
456 1 : }
457 1 : if len(g.liveReaders) != len(g.dbs) || len(g.liveWriters) != len(g.dbs) {
458 0 : panic(fmt.Sprintf("unexpected counts: liveReaders %d, liveWriters: %d",
459 0 : len(g.liveReaders), len(g.liveWriters)))
460 : }
461 1 : g.add(&dbRestartOp{dbID: dbID})
462 : }
463 :
464 : // maybeSetSnapshotIterBounds must be called whenever creating a new iterator or
465 : // modifying the bounds of an iterator. If the iterator is backed by a snapshot
466 : // that only guarantees consistency within a limited set of key spans, then the
467 : // iterator must set bounds within one of the snapshot's consistent keyspans. It
468 : // returns true if the provided readerID is a bounded snapshot and bounds were
469 : // set.
470 1 : func (g *generator) maybeSetSnapshotIterBounds(readerID objID, opts *iterOpts) bool {
471 1 : snapBounds, isBoundedSnapshot := g.snapshotBounds[readerID]
472 1 : if !isBoundedSnapshot {
473 1 : return false
474 1 : }
475 : // Pick a random keyrange within one of the snapshot's key ranges.
476 1 : parentBounds := pickOneUniform(g.rng, snapBounds)
477 1 : // With 10% probability, use the parent start bound as-is.
478 1 : if g.rng.Float64() <= 0.1 {
479 1 : opts.lower = parentBounds.Start
480 1 : } else {
481 1 : opts.lower = g.keyGenerator.RandKeyInRange(0.1, parentBounds)
482 1 : }
483 : // With 10% probability, use the parent end bound as-is.
484 1 : if g.rng.Float64() <= 0.1 {
485 1 : opts.upper = parentBounds.End
486 1 : } else {
487 1 : opts.upper = g.keyGenerator.RandKeyInRange(0.1, pebble.KeyRange{
488 1 : Start: opts.lower,
489 1 : End: parentBounds.End,
490 1 : })
491 1 : }
492 1 : return true
493 : }
494 :
495 1 : func (g *generator) newIter() {
496 1 : iterID := makeObjID(iterTag, g.init.iterSlots)
497 1 : g.init.iterSlots++
498 1 : g.liveIters = append(g.liveIters, iterID)
499 1 :
500 1 : readerID := g.liveReaders.rand(g.rng)
501 1 : if iters := g.readers[readerID]; iters != nil {
502 1 : iters[iterID] = struct{}{}
503 1 : g.iters[iterID] = iters
504 1 : //lint:ignore SA9003 - readability
505 1 : } else {
506 1 : // NB: the DB object does not track its open iterators because it never
507 1 : // closes.
508 1 : }
509 1 : g.iterReaderID[iterID] = readerID
510 1 : dbID := g.deriveDB(iterID)
511 1 :
512 1 : var opts iterOpts
513 1 : if !g.maybeSetSnapshotIterBounds(readerID, &opts) {
514 1 : // Generate lower/upper bounds with a 10% probability.
515 1 : if g.rng.Float64() <= 0.1 {
516 1 : // Generate a new key with a .1% probability.
517 1 : opts.lower = g.keyGenerator.RandKey(0.001)
518 1 : }
519 1 : if g.rng.Float64() <= 0.1 {
520 1 : // Generate a new key with a .1% probability.
521 1 : opts.upper = g.keyGenerator.RandKey(0.001)
522 1 : }
523 1 : if g.cmp(opts.lower, opts.upper) > 0 {
524 1 : opts.lower, opts.upper = opts.upper, opts.lower
525 1 : }
526 : }
527 1 : opts.keyTypes, opts.maskSuffix = g.randKeyTypesAndMask()
528 1 :
529 1 : // With 10% probability, enable automatic filtering of keys with suffixes
530 1 : // not in the provided range. This filtering occurs both through
531 1 : // block-property filtering and explicitly within the iterator operations to
532 1 : // ensure determinism.
533 1 : if g.rng.Float64() <= 0.1 {
534 1 : opts.filterMin, opts.filterMax = g.keyGenerator.SuffixRange()
535 1 : }
536 :
537 : // Enable L6 filters with a 10% probability.
538 1 : if g.rng.Float64() <= 0.1 {
539 1 : opts.useL6Filters = true
540 1 : }
541 :
542 1 : g.itersLastOpts[iterID] = opts
543 1 : g.iterVisibleKeys[iterID] = g.keyManager.getSetOfVisibleKeys(readerID)
544 1 : g.iterReaderID[iterID] = readerID
545 1 : g.add(&newIterOp{
546 1 : readerID: readerID,
547 1 : iterID: iterID,
548 1 : iterOpts: opts,
549 1 : derivedDBID: dbID,
550 1 : })
551 : }
552 :
553 1 : func (g *generator) randKeyTypesAndMask() (keyTypes uint32, maskSuffix []byte) {
554 1 : // Iterate over different key types.
555 1 : p := g.rng.Float64()
556 1 : switch {
557 1 : case p < 0.2: // 20% probability
558 1 : keyTypes = uint32(pebble.IterKeyTypePointsOnly)
559 1 : case p < 0.8: // 60% probability
560 1 : keyTypes = uint32(pebble.IterKeyTypePointsAndRanges)
561 1 : // With 50% probability, enable masking.
562 1 : if g.rng.IntN(2) == 1 {
563 1 : maskSuffix = g.keyGenerator.UniformSuffix()
564 1 : }
565 1 : default: // 20% probability
566 1 : keyTypes = uint32(pebble.IterKeyTypeRangesOnly)
567 : }
568 1 : return keyTypes, maskSuffix
569 : }
570 :
571 1 : func (g *generator) deriveDB(readerID objID) objID {
572 1 : dbParentID := readerID
573 1 : if readerID.tag() == iterTag {
574 1 : dbParentID = g.iterReaderID[readerID]
575 1 : }
576 1 : if dbParentID.tag() != dbTag {
577 1 : dbParentID = g.dbIDForObj(dbParentID)
578 1 : }
579 1 : g.objDB[readerID] = dbParentID
580 1 : return dbParentID
581 : }
582 :
583 1 : func (g *generator) dbIDForObj(objID objID) objID {
584 1 : if g.objDB[objID] == 0 {
585 0 : panic(fmt.Sprintf("object %s has no associated DB", objID))
586 : }
587 1 : return g.objDB[objID]
588 : }
589 :
590 1 : func (g *generator) newIterUsingClone() {
591 1 : if len(g.liveIters) == 0 {
592 1 : return
593 1 : }
594 1 : existingIterID := g.liveIters.rand(g.rng)
595 1 : iterID := makeObjID(iterTag, g.init.iterSlots)
596 1 : g.init.iterSlots++
597 1 : g.liveIters = append(g.liveIters, iterID)
598 1 : if iters := g.iters[existingIterID]; iters != nil {
599 1 : iters[iterID] = struct{}{}
600 1 : g.iters[iterID] = iters
601 1 : //lint:ignore SA9003 - readability
602 1 : } else {
603 1 : // NB: the DB object does not track its open iterators because it never
604 1 : // closes.
605 1 : }
606 1 : readerID := g.iterReaderID[existingIterID]
607 1 : g.iterReaderID[iterID] = readerID
608 1 : g.deriveDB(iterID)
609 1 :
610 1 : var refreshBatch bool
611 1 : visibleKeys := g.iterVisibleKeys[existingIterID]
612 1 : if readerID.tag() == batchTag {
613 0 : if refreshBatch = g.rng.IntN(2) == 1; refreshBatch {
614 0 : visibleKeys = g.keyManager.getSetOfVisibleKeys(readerID)
615 0 : }
616 : }
617 :
618 1 : opts := g.itersLastOpts[existingIterID]
619 1 : // With 50% probability, consider modifying the iterator options used by the
620 1 : // clone.
621 1 : if g.rng.IntN(2) == 1 {
622 1 : g.maybeMutateOptions(readerID, &opts)
623 1 : }
624 1 : g.itersLastOpts[iterID] = opts
625 1 :
626 1 : // Copy the visible keys from the existing iterator.
627 1 : g.iterVisibleKeys[iterID] = visibleKeys
628 1 : g.iterReaderID[iterID] = g.iterReaderID[existingIterID]
629 1 : g.add(&newIterUsingCloneOp{
630 1 : existingIterID: existingIterID,
631 1 : iterID: iterID,
632 1 : refreshBatch: refreshBatch,
633 1 : iterOpts: opts,
634 1 : derivedReaderID: readerID,
635 1 : })
636 : }
637 :
638 1 : func (g *generator) iterClose(iterID objID) {
639 1 : g.liveIters.remove(iterID)
640 1 : if readerIters, ok := g.iters[iterID]; ok {
641 1 : delete(g.iters, iterID)
642 1 : delete(readerIters, iterID)
643 1 : }
644 :
645 1 : g.add(&closeOp{objID: iterID})
646 : }
647 :
648 1 : func (g *generator) iterSetBounds(iterID objID) {
649 1 : iterLastOpts := g.itersLastOpts[iterID]
650 1 : newOpts := iterLastOpts
651 1 : // TODO(jackson): The logic to increase the probability of advancing bounds
652 1 : // monotonically only applies if the snapshot is not bounded. Refactor to
653 1 : // allow bounded snapshots to benefit too, when possible.
654 1 : if !g.maybeSetSnapshotIterBounds(g.iterReaderID[iterID], &newOpts) {
655 1 : var lower, upper []byte
656 1 : genLower := g.rng.Float64() <= 0.9
657 1 : genUpper := g.rng.Float64() <= 0.9
658 1 : // When one of ensureLowerGE, ensureUpperLE is true, the new bounds
659 1 : // don't overlap with the previous bounds.
660 1 : var ensureLowerGE, ensureUpperLE bool
661 1 : if genLower && iterLastOpts.upper != nil && g.rng.Float64() <= 0.9 {
662 1 : ensureLowerGE = true
663 1 : }
664 1 : if (!ensureLowerGE || g.rng.Float64() < 0.5) && genUpper && iterLastOpts.lower != nil {
665 1 : ensureUpperLE = true
666 1 : ensureLowerGE = false
667 1 : }
668 1 : attempts := 0
669 1 : for {
670 1 : attempts++
671 1 : if genLower {
672 1 : // Generate a new key with a .1% probability.
673 1 : lower = g.keyGenerator.RandKey(0.001)
674 1 : }
675 1 : if genUpper {
676 1 : // Generate a new key with a .1% probability.
677 1 : upper = g.keyGenerator.RandKey(0.001)
678 1 : }
679 1 : if g.cmp(lower, upper) > 0 {
680 1 : lower, upper = upper, lower
681 1 : }
682 1 : if ensureLowerGE && g.cmp(iterLastOpts.upper, lower) > 0 {
683 1 : if attempts < 25 {
684 1 : continue
685 : }
686 1 : lower = iterLastOpts.upper
687 1 : upper = lower
688 1 : break
689 : }
690 1 : if ensureUpperLE && g.cmp(upper, iterLastOpts.lower) > 0 {
691 1 : if attempts < 25 {
692 1 : continue
693 : }
694 1 : upper = iterLastOpts.lower
695 1 : lower = upper
696 1 : break
697 : }
698 1 : break
699 : }
700 1 : newOpts.lower = lower
701 1 : newOpts.upper = upper
702 : }
703 1 : g.itersLastOpts[iterID] = newOpts
704 1 : g.add(&iterSetBoundsOp{
705 1 : iterID: iterID,
706 1 : lower: newOpts.lower,
707 1 : upper: newOpts.upper,
708 1 : })
709 1 : // Additionally seek the iterator in a manner consistent with the bounds,
710 1 : // and do some steps (Next/Prev). The seeking exercises typical
711 1 : // CockroachDB behavior when using iterators and the steps are trying to
712 1 : // stress the region near the bounds. Ideally, we should not do this as
713 1 : // part of generating a single op, but this is easier than trying to
714 1 : // control future op generation via generator state.
715 1 : doSeekLT := newOpts.upper != nil && g.rng.Float64() < 0.5
716 1 : doSeekGE := newOpts.lower != nil && g.rng.Float64() < 0.5
717 1 : if doSeekLT && doSeekGE {
718 1 : // Pick the seek.
719 1 : if g.rng.Float64() < 0.5 {
720 1 : doSeekGE = false
721 1 : } else {
722 1 : doSeekLT = false
723 1 : }
724 : }
725 1 : if doSeekLT {
726 1 : g.add(&iterSeekLTOp{
727 1 : iterID: iterID,
728 1 : key: newOpts.upper,
729 1 : derivedReaderID: g.iterReaderID[iterID],
730 1 : })
731 1 : if g.rng.Float64() < 0.5 {
732 1 : g.iterNext(iterID)
733 1 : }
734 1 : if g.rng.Float64() < 0.5 {
735 1 : g.iterNext(iterID)
736 1 : }
737 1 : if g.rng.Float64() < 0.5 {
738 1 : g.iterPrev(iterID)
739 1 : }
740 1 : } else if doSeekGE {
741 1 : g.add(&iterSeekGEOp{
742 1 : iterID: iterID,
743 1 : key: newOpts.lower,
744 1 : derivedReaderID: g.iterReaderID[iterID],
745 1 : })
746 1 : if g.rng.Float64() < 0.5 {
747 1 : g.iterPrev(iterID)
748 1 : }
749 1 : if g.rng.Float64() < 0.5 {
750 1 : g.iterPrev(iterID)
751 1 : }
752 1 : if g.rng.Float64() < 0.5 {
753 1 : g.iterNext(iterID)
754 1 : }
755 : }
756 : }
757 :
758 1 : func (g *generator) iterSetOptions(iterID objID) {
759 1 : opts := g.itersLastOpts[iterID]
760 1 : g.maybeMutateOptions(g.iterReaderID[iterID], &opts)
761 1 : g.itersLastOpts[iterID] = opts
762 1 : g.add(&iterSetOptionsOp{
763 1 : iterID: iterID,
764 1 : iterOpts: opts,
765 1 : derivedReaderID: g.iterReaderID[iterID],
766 1 : })
767 1 :
768 1 : // Additionally, perform a random absolute positioning operation. The
769 1 : // SetOptions contract requires one before the next relative positioning
770 1 : // operation. Ideally, we should not do this as part of generating a single
771 1 : // op, but this is easier than trying to control future op generation via
772 1 : // generator state.
773 1 : pickOneUniform(
774 1 : g.rng,
775 1 : []func(objID){
776 1 : g.iterFirst,
777 1 : g.iterLast,
778 1 : g.iterSeekGE,
779 1 : g.iterSeekGEWithLimit,
780 1 : g.iterSeekPrefixGE,
781 1 : g.iterSeekLT,
782 1 : g.iterSeekLTWithLimit,
783 1 : },
784 1 : )(iterID)
785 1 : }
786 :
787 1 : func (g *generator) iterSeekGE(iterID objID) {
788 1 : g.add(&iterSeekGEOp{
789 1 : iterID: iterID,
790 1 : key: g.keyGenerator.RandKey(0.001), // 0.1% new keys
791 1 : derivedReaderID: g.iterReaderID[iterID],
792 1 : })
793 1 : }
794 :
795 1 : func (g *generator) iterSeekGEWithLimit(iterID objID) {
796 1 : // 0.1% new keys
797 1 : key, limit := g.keyGenerator.RandKey(0.001), g.keyGenerator.RandKey(0.001)
798 1 : if g.cmp(key, limit) > 0 {
799 1 : key, limit = limit, key
800 1 : }
801 1 : g.add(&iterSeekGEOp{
802 1 : iterID: iterID,
803 1 : key: key,
804 1 : limit: limit,
805 1 : derivedReaderID: g.iterReaderID[iterID],
806 1 : })
807 : }
808 :
809 1 : func (g *generator) iterSeekPrefixGE(iterID objID) {
810 1 : // Purely random key selection is unlikely to pick a key with any visible
811 1 : // versions, especially if we don't take iterator bounds into account. We
812 1 : // try to err towards picking a key within bounds that contains a value
813 1 : // visible to the iterator.
814 1 : lower := g.itersLastOpts[iterID].lower
815 1 : upper := g.itersLastOpts[iterID].upper
816 1 : var key []byte
817 1 : if g.rng.IntN(5) >= 1 {
818 1 : visibleKeys := g.iterVisibleKeys[iterID]
819 1 : if lower != nil {
820 1 : i, _ := slices.BinarySearchFunc(visibleKeys, lower, g.cmp)
821 1 : visibleKeys = visibleKeys[i:]
822 1 : }
823 1 : if upper != nil {
824 1 : i, _ := slices.BinarySearchFunc(visibleKeys, upper, g.cmp)
825 1 : visibleKeys = visibleKeys[:i]
826 1 : }
827 1 : if len(visibleKeys) > 0 {
828 1 : key = visibleKeys[g.rng.IntN(len(visibleKeys))]
829 1 : }
830 : }
831 1 : if key == nil {
832 1 : key = g.keyGenerator.RandKey(0) // 0% new keys
833 1 : }
834 : // Sometimes limit the key to just the prefix.
835 1 : if g.rng.IntN(3) == 1 {
836 1 : key = g.keyManager.kf.Comparer.Split.Prefix(key)
837 1 : }
838 1 : g.add(&iterSeekPrefixGEOp{
839 1 : iterID: iterID,
840 1 : key: key,
841 1 : derivedReaderID: g.iterReaderID[iterID],
842 1 : })
843 : }
844 :
845 1 : func (g *generator) iterSeekLT(iterID objID) {
846 1 : g.add(&iterSeekLTOp{
847 1 : iterID: iterID,
848 1 : key: g.keyGenerator.RandKey(0.001), // 0.1% new keys
849 1 : derivedReaderID: g.iterReaderID[iterID],
850 1 : })
851 1 : }
852 :
853 1 : func (g *generator) iterSeekLTWithLimit(iterID objID) {
854 1 : // 0.1% new keys
855 1 : key, limit := g.keyGenerator.RandKey(0.001), g.keyGenerator.RandKey(0.001)
856 1 : if g.cmp(limit, key) > 0 {
857 1 : key, limit = limit, key
858 1 : }
859 1 : g.add(&iterSeekLTOp{
860 1 : iterID: iterID,
861 1 : key: key,
862 1 : limit: limit,
863 1 : derivedReaderID: g.iterReaderID[iterID],
864 1 : })
865 : }
866 :
867 : // randIter performs partial func application ("currying"), returning a new
868 : // function that supplies the given func with a random iterator.
869 1 : func (g *generator) randIter(gen func(objID)) func() {
870 1 : return func() {
871 1 : if len(g.liveIters) == 0 {
872 1 : return
873 1 : }
874 1 : gen(g.liveIters.rand(g.rng))
875 : }
876 : }
877 :
878 1 : func (g *generator) iterFirst(iterID objID) {
879 1 : g.add(&iterFirstOp{
880 1 : iterID: iterID,
881 1 : derivedReaderID: g.iterReaderID[iterID],
882 1 : })
883 1 : }
884 :
885 1 : func (g *generator) iterLast(iterID objID) {
886 1 : g.add(&iterLastOp{
887 1 : iterID: iterID,
888 1 : derivedReaderID: g.iterReaderID[iterID],
889 1 : })
890 1 : }
891 :
892 1 : func (g *generator) iterNext(iterID objID) {
893 1 : g.add(&iterNextOp{
894 1 : iterID: iterID,
895 1 : derivedReaderID: g.iterReaderID[iterID],
896 1 : })
897 1 : }
898 :
899 1 : func (g *generator) iterPrev(iterID objID) {
900 1 : g.add(&iterPrevOp{
901 1 : iterID: iterID,
902 1 : derivedReaderID: g.iterReaderID[iterID],
903 1 : })
904 1 : }
905 :
906 1 : func (g *generator) iterNextWithLimit(iterID objID) {
907 1 : g.add(&iterNextOp{
908 1 : iterID: iterID,
909 1 : limit: g.keyGenerator.RandKey(0.001), // 0.1% new keys
910 1 : derivedReaderID: g.iterReaderID[iterID],
911 1 : })
912 1 : }
913 :
914 1 : func (g *generator) iterNextPrefix(iterID objID) {
915 1 : g.add(&iterNextPrefixOp{
916 1 : iterID: iterID,
917 1 : derivedReaderID: g.iterReaderID[iterID],
918 1 : })
919 1 : }
920 :
921 1 : func (g *generator) iterCanSingleDelete(iterID objID) {
922 1 : g.add(&iterCanSingleDelOp{
923 1 : iterID: iterID,
924 1 : derivedReaderID: g.iterReaderID[iterID],
925 1 : })
926 1 : }
927 :
928 1 : func (g *generator) iterPrevWithLimit(iterID objID) {
929 1 : g.add(&iterPrevOp{
930 1 : iterID: iterID,
931 1 : limit: g.keyGenerator.RandKey(0.001), // 0.1% new keys
932 1 : derivedReaderID: g.iterReaderID[iterID],
933 1 : })
934 1 : }
935 :
936 1 : func (g *generator) readerGet() {
937 1 : if len(g.liveReaders) == 0 {
938 0 : return
939 0 : }
940 :
941 1 : readerID := g.liveReaders.rand(g.rng)
942 1 :
943 1 : // If the chosen reader is a snapshot created with user-specified key
944 1 : // ranges, restrict the read to fall within one of the provided key ranges.
945 1 : var key []byte
946 1 : if bounds := g.snapshotBounds[readerID]; len(bounds) > 0 {
947 1 : kr := bounds[g.rng.IntN(len(bounds))]
948 1 : key = g.keyGenerator.RandKeyInRange(0.001, kr) // 0.1% new keys
949 1 : } else {
950 1 : key = g.keyGenerator.RandKey(0.001) // 0.1% new keys
951 1 : }
952 1 : derivedDBID := objID(0)
953 1 : if readerID.tag() == batchTag || readerID.tag() == snapTag {
954 1 : derivedDBID = g.deriveDB(readerID)
955 1 : }
956 1 : g.add(&getOp{readerID: readerID, key: key, derivedDBID: derivedDBID})
957 : }
958 :
959 1 : func (g *generator) replicate() {
960 1 : if len(g.dbs) < 2 {
961 0 : return
962 0 : }
963 :
964 1 : source := g.dbs.rand(g.rng)
965 1 : dest := source
966 1 : for dest == source {
967 1 : dest = g.dbs.rand(g.rng)
968 1 : }
969 :
970 1 : startKey, endKey := g.prefixKeyRange()
971 1 : g.add(&replicateOp{
972 1 : source: source,
973 1 : dest: dest,
974 1 : start: startKey,
975 1 : end: endKey,
976 1 : })
977 : }
978 :
979 : // generateDisjointKeyRanges generates n disjoint key ranges.
980 1 : func (g *generator) generateDisjointKeyRanges(n int) []pebble.KeyRange {
981 1 : keys := g.keyGenerator.UniqueKeys(2*n, func() []byte { return g.keyGenerator.RandPrefix(0.1) })
982 1 : keyRanges := make([]pebble.KeyRange, n)
983 1 : for i := range keyRanges {
984 1 : keyRanges[i] = pebble.KeyRange{
985 1 : Start: keys[i*2],
986 1 : End: keys[i*2+1],
987 1 : }
988 1 : }
989 1 : return keyRanges
990 : }
991 :
992 1 : func (g *generator) newSnapshot() {
993 1 : snapID := makeObjID(snapTag, g.init.snapshotSlots)
994 1 : g.init.snapshotSlots++
995 1 : g.liveSnapshots = append(g.liveSnapshots, snapID)
996 1 : g.liveReaders = append(g.liveReaders, snapID)
997 1 : dbID := g.dbs.rand(g.rng)
998 1 : g.objDB[snapID] = dbID
999 1 :
1000 1 : iters := make(objIDSet)
1001 1 : g.snapshots[snapID] = iters
1002 1 : g.readers[snapID] = iters
1003 1 :
1004 1 : s := &newSnapshotOp{
1005 1 : dbID: dbID,
1006 1 : snapID: snapID,
1007 1 : }
1008 1 :
1009 1 : // Impose bounds on the keys that may be read with the snapshot. Setting bounds
1010 1 : // allows some runs of the metamorphic test to use a EventuallyFileOnlySnapshot
1011 1 : // instead of a Snapshot, testing equivalence between the two for reads within
1012 1 : // those bounds.
1013 1 : s.bounds = g.generateDisjointKeyRanges(
1014 1 : 1 + g.expRandInt(3),
1015 1 : )
1016 1 : g.snapshotBounds[snapID] = s.bounds
1017 1 : g.add(s)
1018 1 : }
1019 :
1020 1 : func (g *generator) snapshotClose() {
1021 1 : if len(g.liveSnapshots) == 0 {
1022 1 : return
1023 1 : }
1024 :
1025 1 : snapID := g.liveSnapshots.rand(g.rng)
1026 1 : g.liveSnapshots.remove(snapID)
1027 1 : iters := g.snapshots[snapID]
1028 1 : delete(g.snapshots, snapID)
1029 1 : g.liveReaders.remove(snapID)
1030 1 : delete(g.readers, snapID)
1031 1 :
1032 1 : for _, id := range iters.sorted() {
1033 1 : g.liveIters.remove(id)
1034 1 : delete(g.iters, id)
1035 1 : g.add(&closeOp{objID: id})
1036 1 : }
1037 :
1038 1 : g.add(&closeOp{objID: snapID})
1039 : }
1040 :
1041 1 : func (g *generator) newExternalObj() {
1042 1 : if len(g.liveBatches) == 0 {
1043 1 : return
1044 1 : }
1045 1 : var batchID objID
1046 1 : // Try to find a suitable batch.
1047 1 : for i := 0; ; i++ {
1048 1 : if i == 10 {
1049 1 : return
1050 1 : }
1051 1 : batchID = g.liveBatches.rand(g.rng)
1052 1 : okm := g.keyManager.objKeyMeta(batchID)
1053 1 : if !okm.bounds.IsUnset() {
1054 1 : break
1055 : }
1056 : }
1057 1 : g.removeBatchFromGenerator(batchID)
1058 1 : objID := makeObjID(externalObjTag, g.init.externalObjSlots)
1059 1 : g.init.externalObjSlots++
1060 1 : g.externalObjects = append(g.externalObjects, objID)
1061 1 : g.add(&newExternalObjOp{
1062 1 : batchID: batchID,
1063 1 : externalObjID: objID,
1064 1 : })
1065 : }
1066 :
1067 1 : func (g *generator) writerApply() {
1068 1 : if len(g.liveBatches) == 0 {
1069 1 : return
1070 1 : }
1071 1 : if len(g.liveWriters) < 2 {
1072 0 : panic(fmt.Sprintf("insufficient liveWriters (%d) to apply batch", len(g.liveWriters)))
1073 : }
1074 :
1075 1 : batchID := g.liveBatches.rand(g.rng)
1076 1 : dbID := g.dbIDForObj(batchID)
1077 1 :
1078 1 : var writerID objID
1079 1 : for {
1080 1 : // NB: The writer we're applying to, as well as the batch we're applying,
1081 1 : // must be from the same DB. The writer could be the db itself. Applying
1082 1 : // a batch from one DB on another DB results in a panic, so avoid that.
1083 1 : writerID = g.liveWriters.rand(g.rng)
1084 1 : writerDBID := writerID
1085 1 : if writerID.tag() != dbTag {
1086 1 : writerDBID = g.dbIDForObj(writerID)
1087 1 : }
1088 1 : if writerID != batchID && writerDBID == dbID {
1089 1 : break
1090 : }
1091 : }
1092 :
1093 : // The batch we're applying may contain single delete tombstones that when
1094 : // applied to the writer result in nondeterminism in the deleted key. If
1095 : // that's the case, we can restore determinism by first deleting the key
1096 : // from the writer.
1097 : //
1098 : // Generating additional operations here is not ideal, but it simplifies
1099 : // single delete invariants significantly.
1100 1 : singleDeleteConflicts := g.keyManager.checkForSingleDelConflicts(batchID, writerID, false /* collapsed */)
1101 1 : for _, conflict := range singleDeleteConflicts {
1102 0 : g.add(&deleteOp{
1103 0 : writerID: writerID,
1104 0 : key: conflict,
1105 0 : derivedDBID: dbID,
1106 0 : })
1107 0 : }
1108 :
1109 1 : g.removeBatchFromGenerator(batchID)
1110 1 :
1111 1 : g.add(&applyOp{
1112 1 : writerID: writerID,
1113 1 : batchID: batchID,
1114 1 : })
1115 1 : g.add(&closeOp{
1116 1 : objID: batchID,
1117 1 : })
1118 : }
1119 :
1120 1 : func (g *generator) writerDelete() {
1121 1 : if len(g.liveWriters) == 0 {
1122 0 : return
1123 0 : }
1124 :
1125 1 : writerID := g.liveWriters.rand(g.rng)
1126 1 : derivedDBID := writerID
1127 1 : if derivedDBID.tag() != dbTag {
1128 1 : derivedDBID = g.dbIDForObj(writerID)
1129 1 : }
1130 1 : g.add(&deleteOp{
1131 1 : writerID: writerID,
1132 1 : key: g.keyGenerator.RandKey(0.001), // 0.1% new keys
1133 1 : derivedDBID: derivedDBID,
1134 1 : })
1135 : }
1136 :
1137 1 : func (g *generator) writerDeleteRange() {
1138 1 : if len(g.liveWriters) == 0 {
1139 0 : return
1140 0 : }
1141 :
1142 1 : keys := g.keyGenerator.UniqueKeys(2, func() []byte { return g.keyGenerator.RandKey(0.001) })
1143 1 : start, end := keys[0], keys[1]
1144 1 :
1145 1 : writerID := g.liveWriters.rand(g.rng)
1146 1 : g.add(&deleteRangeOp{
1147 1 : writerID: writerID,
1148 1 : start: start,
1149 1 : end: end,
1150 1 : })
1151 : }
1152 :
1153 1 : func (g *generator) writerRangeKeyDelete() {
1154 1 : if len(g.liveWriters) == 0 {
1155 0 : return
1156 0 : }
1157 1 : start, end := g.prefixKeyRange()
1158 1 :
1159 1 : writerID := g.liveWriters.rand(g.rng)
1160 1 : g.add(&rangeKeyDeleteOp{
1161 1 : writerID: writerID,
1162 1 : start: start,
1163 1 : end: end,
1164 1 : })
1165 : }
1166 :
1167 1 : func (g *generator) writerRangeKeySet() {
1168 1 : if len(g.liveWriters) == 0 {
1169 0 : return
1170 0 : }
1171 1 : start, end := g.prefixKeyRange()
1172 1 :
1173 1 : // 90% of the time, set a suffix.
1174 1 : var suffix []byte
1175 1 : if g.rng.Float64() < 0.90 {
1176 1 : // Increase the max suffix 5% of the time.
1177 1 : suffix = g.keyGenerator.SkewedSuffix(0.05)
1178 1 : }
1179 :
1180 1 : writerID := g.liveWriters.rand(g.rng)
1181 1 : g.add(&rangeKeySetOp{
1182 1 : writerID: writerID,
1183 1 : start: start,
1184 1 : end: end,
1185 1 : suffix: suffix,
1186 1 : value: randBytes(g.rng, 0, maxValueSize),
1187 1 : })
1188 : }
1189 :
1190 1 : func (g *generator) writerRangeKeyUnset() {
1191 1 : if len(g.liveWriters) == 0 {
1192 0 : return
1193 0 : }
1194 1 : start, end := g.prefixKeyRange()
1195 1 :
1196 1 : // 90% of the time, set a suffix.
1197 1 : var suffix []byte
1198 1 : if g.rng.Float64() < 0.90 {
1199 1 : // Increase the max suffix 5% of the time.
1200 1 : suffix = g.keyGenerator.SkewedSuffix(0.05)
1201 1 : }
1202 :
1203 : // TODO(jackson): Increase probability of effective unsets? Purely random
1204 : // unsets are unlikely to remove an active range key.
1205 :
1206 1 : writerID := g.liveWriters.rand(g.rng)
1207 1 : g.add(&rangeKeyUnsetOp{
1208 1 : writerID: writerID,
1209 1 : start: start,
1210 1 : end: end,
1211 1 : suffix: suffix,
1212 1 : })
1213 : }
1214 :
1215 1 : func (g *generator) writerIngest() {
1216 1 : if len(g.liveBatches) == 0 {
1217 1 : return
1218 1 : }
1219 :
1220 1 : dbID := g.dbs.rand(g.rng)
1221 1 : n := min(1+g.expRandInt(1), len(g.liveBatches))
1222 1 : batchIDs := make([]objID, n)
1223 1 : derivedDBIDs := make([]objID, n)
1224 1 : for i := 0; i < n; i++ {
1225 1 : batchID := g.liveBatches.rand(g.rng)
1226 1 : batchIDs[i] = batchID
1227 1 : derivedDBIDs[i] = g.dbIDForObj(batchID)
1228 1 : g.removeBatchFromGenerator(batchID)
1229 1 : }
1230 :
1231 : // Ingestions may fail if the ingested sstables overlap one another.
1232 : // Either it succeeds and its keys are committed to the DB, or it fails and
1233 : // the keys are not committed.
1234 1 : if !g.keyManager.doObjectBoundsOverlap(batchIDs) {
1235 1 : // This ingestion will succeed.
1236 1 : //
1237 1 : // The batches we're ingesting may contain single delete tombstones that
1238 1 : // when applied to the writer result in nondeterminism in the deleted key.
1239 1 : // If that's the case, we can restore determinism by first deleting the keys
1240 1 : // from the writer.
1241 1 : //
1242 1 : // Generating additional operations here is not ideal, but it simplifies
1243 1 : // single delete invariants significantly.
1244 1 : for _, batchID := range batchIDs {
1245 1 : singleDeleteConflicts := g.keyManager.checkForSingleDelConflicts(batchID, dbID, true /* collapsed */)
1246 1 : for _, conflict := range singleDeleteConflicts {
1247 1 : g.add(&deleteOp{
1248 1 : writerID: dbID,
1249 1 : key: conflict,
1250 1 : derivedDBID: dbID,
1251 1 : })
1252 1 : }
1253 : }
1254 : }
1255 1 : g.add(&ingestOp{
1256 1 : dbID: dbID,
1257 1 : batchIDs: batchIDs,
1258 1 : derivedDBIDs: derivedDBIDs,
1259 1 : })
1260 : }
1261 :
1262 1 : func (g *generator) writerIngestAndExcise() {
1263 1 : if len(g.liveBatches) == 0 {
1264 1 : return
1265 1 : }
1266 :
1267 1 : dbID := g.dbs.rand(g.rng)
1268 1 : batchID := g.liveBatches.rand(g.rng)
1269 1 : g.removeBatchFromGenerator(batchID)
1270 1 :
1271 1 : start, end := g.prefixKeyRange()
1272 1 : derivedDBID := g.dbIDForObj(batchID)
1273 1 :
1274 1 : // Check for any single delete conflicts. If this batch is single-deleting
1275 1 : // a key that isn't safe to single delete in the underlying db, _and_ this
1276 1 : // key is not in the excise span, we add a delete before the ingestAndExcise.
1277 1 : singleDeleteConflicts := g.keyManager.checkForSingleDelConflicts(batchID, dbID, true /* collapsed */)
1278 1 : for _, conflict := range singleDeleteConflicts {
1279 1 : if g.cmp(conflict, start) >= 0 && g.cmp(conflict, end) < 0 {
1280 0 : // This key will get excised anyway.
1281 0 : continue
1282 : }
1283 1 : g.add(&deleteOp{
1284 1 : writerID: dbID,
1285 1 : key: conflict,
1286 1 : derivedDBID: dbID,
1287 1 : })
1288 : }
1289 :
1290 1 : g.add(&ingestAndExciseOp{
1291 1 : dbID: dbID,
1292 1 : batchID: batchID,
1293 1 : derivedDBID: derivedDBID,
1294 1 : exciseStart: start,
1295 1 : exciseEnd: end,
1296 1 : })
1297 : }
1298 :
1299 1 : func (g *generator) writerIngestExternalFiles() {
1300 1 : if len(g.externalObjects) == 0 {
1301 1 : return
1302 1 : }
1303 1 : dbID := g.dbs.rand(g.rng)
1304 1 : numFiles := 1 + g.expRandInt(1)
1305 1 : objs := make([]externalObjWithBounds, numFiles)
1306 1 :
1307 1 : // We generate the parameters in multiple passes:
1308 1 : // 1. Generate objs with random start and end keys. Their bounds can overlap.
1309 1 : // 2. Sort objects by the start bound and trim the bounds to remove overlap.
1310 1 : // 3. Remove any objects where the previous step resulted in empty bounds.
1311 1 : // 4. Randomly add synthetic suffixes.
1312 1 :
1313 1 : for i := range objs {
1314 1 : // We allow the same object to be selected multiple times.
1315 1 : id := g.externalObjects.rand(g.rng)
1316 1 : b := g.keyManager.objKeyMeta(id).bounds
1317 1 :
1318 1 : objStart := g.prefix(b.smallest)
1319 1 : objEnd := g.prefix(b.largest)
1320 1 : if !b.largestExcl || len(objEnd) != len(b.largest) {
1321 1 : // Move up the end key a bit by appending a few letters to the prefix.
1322 1 : objEnd = append(objEnd, randBytes(g.rng, 1, 3)...)
1323 1 : }
1324 1 : if g.cmp(objStart, objEnd) >= 0 {
1325 0 : panic("bug in generating obj bounds")
1326 : }
1327 : // Generate two random keys within the given bounds.
1328 : // First, generate a start key in the range [objStart, objEnd).
1329 1 : start := g.keyGenerator.RandKeyInRange(0.01, pebble.KeyRange{
1330 1 : Start: objStart,
1331 1 : End: objEnd,
1332 1 : })
1333 1 : start = g.prefix(start)
1334 1 : // Second, generate an end key in the range (start, objEnd]. To do this, we
1335 1 : // generate a key in the range [start, objEnd) and if we get `start`, we
1336 1 : // remap that to `objEnd`.
1337 1 : end := g.keyGenerator.RandKeyInRange(0.01, pebble.KeyRange{
1338 1 : Start: start,
1339 1 : End: objEnd,
1340 1 : })
1341 1 : end = g.prefix(end)
1342 1 : if g.cmp(start, end) == 0 {
1343 1 : end = objEnd
1344 1 : }
1345 : // Randomly set up synthetic prefix.
1346 1 : var syntheticPrefix sstable.SyntheticPrefix
1347 1 : if g.rng.IntN(2) == 0 {
1348 1 : syntheticPrefix = randBytes(g.rng, 1, 5)
1349 1 : start = syntheticPrefix.Apply(start)
1350 1 : end = syntheticPrefix.Apply(end)
1351 1 : }
1352 :
1353 1 : objs[i] = externalObjWithBounds{
1354 1 : externalObjID: id,
1355 1 : bounds: pebble.KeyRange{
1356 1 : Start: start,
1357 1 : End: end,
1358 1 : },
1359 1 : syntheticPrefix: syntheticPrefix,
1360 1 : }
1361 : }
1362 :
1363 : // Sort by start bound.
1364 1 : slices.SortFunc(objs, func(a, b externalObjWithBounds) int {
1365 1 : return g.cmp(a.bounds.Start, b.bounds.Start)
1366 1 : })
1367 :
1368 : // Trim bounds so that there is no overlap.
1369 1 : for i := 0; i < len(objs)-1; i++ {
1370 1 : if g.cmp(objs[i].bounds.End, objs[i+1].bounds.Start) > 0 {
1371 1 : objs[i].bounds.End = objs[i+1].bounds.Start
1372 1 : }
1373 : }
1374 : // Some bounds might be empty now, remove those objects altogether. Note that
1375 : // the last object is unmodified, so at least that object will remain.
1376 1 : objs = slices.DeleteFunc(objs, func(o externalObjWithBounds) bool {
1377 1 : return g.cmp(o.bounds.Start, o.bounds.End) >= 0
1378 1 : })
1379 :
1380 : // Randomly set synthetic suffixes.
1381 1 : for i := range objs {
1382 1 : if g.rng.IntN(2) == 0 {
1383 1 : // We can only use a synthetic suffix if we don't have range dels or RangeKeyUnsets.
1384 1 : if meta := g.keyManager.objKeyMeta(objs[i].externalObjID); meta.hasRangeDels || meta.hasRangeKeyUnset {
1385 1 : continue
1386 : }
1387 :
1388 : // We can only use a synthetic suffix if we don't have overlapping range
1389 : // key sets (because they will become logically conflicting when we
1390 : // replace their suffixes with the synthetic one).
1391 1 : if g.keyManager.ExternalObjectHasOverlappingRangeKeySets(objs[i].externalObjID) {
1392 0 : continue
1393 : }
1394 :
1395 : // Generate a suffix that sorts before any previously generated suffix.
1396 1 : objs[i].syntheticSuffix = g.keyGenerator.IncMaxSuffix()
1397 : }
1398 : }
1399 :
1400 : // The batches we're ingesting may contain single delete tombstones that when
1401 : // applied to the db result in nondeterminism in the deleted key. If that's
1402 : // the case, we can restore determinism by first deleting the keys from the
1403 : // db.
1404 : //
1405 : // Generating additional operations here is not ideal, but it simplifies
1406 : // single delete invariants significantly.
1407 1 : dbKeys := g.keyManager.objKeyMeta(dbID)
1408 1 : for _, o := range objs {
1409 1 : for _, src := range g.keyManager.KeysForExternalIngest(o) {
1410 1 : if g.keyManager.checkForSingleDelConflict(src, dbKeys) {
1411 0 : g.add(&deleteOp{
1412 0 : writerID: dbID,
1413 0 : key: src.key,
1414 0 : derivedDBID: dbID,
1415 0 : })
1416 0 : }
1417 : }
1418 : }
1419 :
1420 : // Shuffle the objects.
1421 1 : g.rng.Shuffle(len(objs), func(i, j int) {
1422 1 : objs[i], objs[j] = objs[j], objs[i]
1423 1 : })
1424 :
1425 1 : g.add(&ingestExternalFilesOp{
1426 1 : dbID: dbID,
1427 1 : objs: objs,
1428 1 : })
1429 : }
1430 :
1431 1 : func (g *generator) writerLogData() {
1432 1 : if len(g.liveWriters) == 0 {
1433 0 : return
1434 0 : }
1435 1 : g.add(&logDataOp{
1436 1 : writerID: g.liveWriters.rand(g.rng),
1437 1 : data: randBytes(g.rng, 0, g.expRandInt(10)),
1438 1 : })
1439 : }
1440 :
1441 1 : func (g *generator) writerMerge() {
1442 1 : if len(g.liveWriters) == 0 {
1443 0 : return
1444 0 : }
1445 :
1446 1 : writerID := g.liveWriters.rand(g.rng)
1447 1 : g.add(&mergeOp{
1448 1 : writerID: writerID,
1449 1 : // 20% new keys.
1450 1 : key: g.keyGenerator.RandKey(0.2),
1451 1 : value: randBytes(g.rng, 0, maxValueSize),
1452 1 : })
1453 : }
1454 :
1455 1 : func (g *generator) writerSet() {
1456 1 : if len(g.liveWriters) == 0 {
1457 0 : return
1458 0 : }
1459 :
1460 1 : writerID := g.liveWriters.rand(g.rng)
1461 1 : g.add(&setOp{
1462 1 : writerID: writerID,
1463 1 : // 50% new keys.
1464 1 : key: g.keyGenerator.RandKey(0.5),
1465 1 : value: randBytes(g.rng, 0, maxValueSize),
1466 1 : })
1467 : }
1468 :
1469 1 : func (g *generator) writerSingleDelete() {
1470 1 : if len(g.liveWriters) == 0 {
1471 0 : return
1472 0 : }
1473 :
1474 1 : writerID := g.liveWriters.rand(g.rng)
1475 1 : key := g.randKeyToSingleDelete(writerID)
1476 1 : if key == nil {
1477 1 : return
1478 1 : }
1479 1 : g.add(&singleDeleteOp{
1480 1 : writerID: writerID,
1481 1 : key: key,
1482 1 : // Keys eligible for single deletes can be removed with a regular
1483 1 : // delete. Mutate a percentage of SINGLEDEL ops into DELETEs. Note that
1484 1 : // here we are only determining whether the replacement *could* happen.
1485 1 : // At test runtime, the `replaceSingleDelete` test option must also be
1486 1 : // set to true for the single delete to be replaced.
1487 1 : maybeReplaceDelete: g.rng.Float64() < 0.25,
1488 1 : })
1489 : }
1490 :
1491 1 : func (g *generator) maybeMutateOptions(readerID objID, opts *iterOpts) {
1492 1 : // With 95% probability, allow changes to any options at all. This ensures
1493 1 : // that in 5% of cases there are no changes, and SetOptions hits its fast
1494 1 : // path.
1495 1 : if g.rng.IntN(100) >= 5 {
1496 1 : if !g.maybeSetSnapshotIterBounds(readerID, opts) {
1497 1 : // With 1/3 probability, clear existing bounds.
1498 1 : if opts.lower != nil && g.rng.IntN(3) == 0 {
1499 1 : opts.lower = nil
1500 1 : }
1501 1 : if opts.upper != nil && g.rng.IntN(3) == 0 {
1502 1 : opts.upper = nil
1503 1 : }
1504 : // With 1/3 probability, update the bounds.
1505 1 : if g.rng.IntN(3) == 0 {
1506 1 : // Generate a new key with a .1% probability.
1507 1 : opts.lower = g.keyGenerator.RandKey(0.001)
1508 1 : }
1509 1 : if g.rng.IntN(3) == 0 {
1510 1 : // Generate a new key with a .1% probability.
1511 1 : opts.upper = g.keyGenerator.RandKey(0.001)
1512 1 : }
1513 1 : if g.cmp(opts.lower, opts.upper) > 0 {
1514 1 : opts.lower, opts.upper = opts.upper, opts.lower
1515 1 : }
1516 : }
1517 :
1518 : // With 1/3 probability, update the key-types/mask.
1519 1 : if g.rng.IntN(3) == 0 {
1520 1 : opts.keyTypes, opts.maskSuffix = g.randKeyTypesAndMask()
1521 1 : }
1522 :
1523 : // With 1/3 probability, clear existing filter.
1524 1 : if opts.filterMax != nil && g.rng.IntN(3) == 0 {
1525 1 : opts.filterMax, opts.filterMin = nil, nil
1526 1 : }
1527 : // With 10% probability, set a filter range.
1528 1 : if g.rng.IntN(10) == 1 {
1529 1 : opts.filterMin, opts.filterMax = g.keyGenerator.SuffixRange()
1530 1 : }
1531 : // With 10% probability, flip enablement of L6 filters.
1532 1 : if g.rng.Float64() <= 0.1 {
1533 1 : opts.useL6Filters = !opts.useL6Filters
1534 1 : }
1535 : }
1536 : }
1537 :
1538 1 : func (g *generator) cmp(a, b []byte) int {
1539 1 : return g.keyManager.kf.Comparer.Compare(a, b)
1540 1 : }
1541 :
1542 1 : func (g *generator) prefix(a []byte) []byte {
1543 1 : n := g.keyManager.kf.Comparer.Split(a)
1544 1 : return a[:n:n]
1545 1 : }
1546 :
1547 0 : func (g *generator) String() string {
1548 0 : var buf bytes.Buffer
1549 0 : for _, op := range g.ops {
1550 0 : fmt.Fprintf(&buf, "%s\n", op)
1551 0 : }
1552 0 : return buf.String()
1553 : }
1554 :
1555 : // expRandInt returns a random non-negative integer using the exponential
1556 : // distribution with the given mean. This is useful when we usually want to test
1557 : // with small values, but we want to occasionally test with a larger value.
1558 : //
1559 : // Large integers are exponentially less likely than small integers;
1560 : // specifically, the probability decreases by a factor of `e` every `mean`
1561 : // values.
1562 1 : func (g *generator) expRandInt(mean int) int {
1563 1 : return int(math.Round(g.rng.ExpFloat64() * float64(mean)))
1564 1 : }
|