Line data Source code
1 : // Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package metamorphic
6 :
7 : import (
8 : "bytes"
9 : "fmt"
10 : "math"
11 : "os"
12 : "slices"
13 :
14 : "github.com/cockroachdb/pebble"
15 : "github.com/cockroachdb/pebble/internal/randvar"
16 : "github.com/cockroachdb/pebble/sstable"
17 : "golang.org/x/exp/rand"
18 : )
19 :
20 : const maxValueSize = 20
21 :
22 : type iterOpts struct {
23 : lower []byte
24 : upper []byte
25 : keyTypes uint32 // pebble.IterKeyType
26 : // maskSuffix may be set if keyTypes is IterKeyTypePointsAndRanges to
27 : // configure IterOptions.RangeKeyMasking.Suffix.
28 : maskSuffix []byte
29 :
30 : // If filterMax is >0, this iterator will filter out any keys that have
31 : // suffixes that don't fall within the range [filterMin,filterMax).
32 : // Additionally, the iterator will be constructed with a block-property
33 : // filter that filters out blocks accordingly. Not all OPTIONS hook up the
34 : // corresponding block property collector, so block-filtering may still be
35 : // effectively disabled in some runs. The iterator operations themselves
36 : // however will always skip past any points that should be filtered to
37 : // ensure determinism.
38 : filterMin uint64
39 : filterMax uint64
40 :
41 : // see IterOptions.UseL6Filters.
42 : useL6Filters bool
43 :
44 : // NB: If adding or removing fields, ensure IsZero is in sync.
45 : }
46 :
47 1 : func (o iterOpts) IsZero() bool {
48 1 : return o.lower == nil && o.upper == nil && o.keyTypes == 0 &&
49 1 : o.maskSuffix == nil && o.filterMin == 0 && o.filterMax == 0 && !o.useL6Filters
50 1 : }
51 :
52 : // GenerateOps generates n random operations, drawing randomness from the
53 : // provided pseudorandom generator and using cfg to determine the distribution
54 : // of op types.
55 0 : func GenerateOps(rng *rand.Rand, n uint64, cfg OpConfig) Ops {
56 0 : // Generate a new set of random ops, writing them to <dir>/ops. These will be
57 0 : // read by the child processes when performing a test run.
58 0 : return generate(rng, n, cfg, newKeyManager(1 /* num instances */))
59 0 : }
60 :
61 : type generator struct {
62 : cfg OpConfig
63 : rng *rand.Rand
64 :
65 : init *initOp
66 : ops []op
67 :
68 : // keyManager tracks the state of keys a operation generation time.
69 : keyManager *keyManager
70 : keyGenerator *keyGenerator
71 : dbs objIDSlice
72 : // Unordered sets of object IDs for live objects. Used to randomly select on
73 : // object when generating an operation. There are 4 concrete objects: the DB
74 : // (of which there is exactly 1), batches, iterators, and snapshots.
75 : //
76 : // liveBatches contains the live indexed and write-only batches.
77 : liveBatches objIDSlice
78 : // liveIters contains the live iterators.
79 : liveIters objIDSlice
80 : itersLastOpts map[objID]iterOpts
81 : // liveReaders contains the DB, and any live indexed batches and snapshots. The DB is always
82 : // at index 0.
83 : liveReaders objIDSlice
84 : // liveSnapshots contains the live snapshots.
85 : liveSnapshots objIDSlice
86 : // liveWriters contains the DB, and any live batches. The DB is always at index 0.
87 : liveWriters objIDSlice
88 : // externalObjects contains the external objects created.
89 : externalObjects objIDSlice
90 :
91 : // Maps used to find associated objects during generation. These maps are not
92 : // needed during test execution.
93 : //
94 : // batchID -> batch iters: used to keep track of the open iterators on an
95 : // indexed batch. The iter set value will also be indexed by the readers map.
96 : batches map[objID]objIDSet
97 : // iterID -> reader iters: used to keep track of all of the open
98 : // iterators. The iter set value will also be indexed by either the batches
99 : // or snapshots maps.
100 : iters map[objID]objIDSet
101 : // objectID -> db: used to keep track of the DB a batch, iter, or snapshot
102 : // was created on.
103 : objDB map[objID]objID
104 : // readerID -> reader iters: used to keep track of the open iterators on a
105 : // reader. The iter set value will also be indexed by either the batches or
106 : // snapshots maps. This map is the union of batches and snapshots maps.
107 : readers map[objID]objIDSet
108 : // snapshotID -> snapshot iters: used to keep track of the open iterators on
109 : // a snapshot. The iter set value will also be indexed by the readers map.
110 : snapshots map[objID]objIDSet
111 : // snapshotID -> bounds of the snapshot: only populated for snapshots that
112 : // are constrained by bounds.
113 : snapshotBounds map[objID][]pebble.KeyRange
114 : // iterSequenceNumber is the metaTimestamp at which the iter was created.
115 : iterCreationTimestamp map[objID]int
116 : // iterReaderID is a map from an iterID to a readerID.
117 : iterReaderID map[objID]objID
118 : }
119 :
120 0 : func newGenerator(rng *rand.Rand, cfg OpConfig, km *keyManager) *generator {
121 0 : keyGenerator := newKeyGenerator(km, rng, cfg)
122 0 : g := &generator{
123 0 : cfg: cfg,
124 0 : rng: rng,
125 0 : init: &initOp{dbSlots: uint32(cfg.numInstances)},
126 0 : keyManager: km,
127 0 : keyGenerator: keyGenerator,
128 0 : liveReaders: objIDSlice{makeObjID(dbTag, 1)},
129 0 : liveWriters: objIDSlice{makeObjID(dbTag, 1)},
130 0 : dbs: objIDSlice{makeObjID(dbTag, 1)},
131 0 : objDB: make(map[objID]objID),
132 0 : batches: make(map[objID]objIDSet),
133 0 : iters: make(map[objID]objIDSet),
134 0 : readers: make(map[objID]objIDSet),
135 0 : snapshots: make(map[objID]objIDSet),
136 0 : snapshotBounds: make(map[objID][]pebble.KeyRange),
137 0 : itersLastOpts: make(map[objID]iterOpts),
138 0 : iterCreationTimestamp: make(map[objID]int),
139 0 : iterReaderID: make(map[objID]objID),
140 0 : }
141 0 : for i := 1; i < cfg.numInstances; i++ {
142 0 : g.liveReaders = append(g.liveReaders, makeObjID(dbTag, uint32(i+1)))
143 0 : g.liveWriters = append(g.liveWriters, makeObjID(dbTag, uint32(i+1)))
144 0 : g.dbs = append(g.dbs, makeObjID(dbTag, uint32(i+1)))
145 0 : }
146 : // Note that the initOp fields are populated during generation.
147 0 : g.ops = append(g.ops, g.init)
148 0 : return g
149 : }
150 :
151 0 : func generate(rng *rand.Rand, count uint64, cfg OpConfig, km *keyManager) []op {
152 0 : g := newGenerator(rng, cfg, km)
153 0 :
154 0 : opGenerators := []func(){
155 0 : OpBatchAbort: g.batchAbort,
156 0 : OpBatchCommit: g.batchCommit,
157 0 : OpDBCheckpoint: g.dbCheckpoint,
158 0 : OpDBCompact: g.dbCompact,
159 0 : OpDBDownload: g.dbDownload,
160 0 : OpDBFlush: g.dbFlush,
161 0 : OpDBRatchetFormatMajorVersion: g.dbRatchetFormatMajorVersion,
162 0 : OpDBRestart: g.dbRestart,
163 0 : OpIterClose: g.randIter(g.iterClose),
164 0 : OpIterFirst: g.randIter(g.iterFirst),
165 0 : OpIterLast: g.randIter(g.iterLast),
166 0 : OpIterNext: g.randIter(g.iterNext),
167 0 : OpIterNextWithLimit: g.randIter(g.iterNextWithLimit),
168 0 : OpIterNextPrefix: g.randIter(g.iterNextPrefix),
169 0 : OpIterCanSingleDelete: g.randIter(g.iterCanSingleDelete),
170 0 : OpIterPrev: g.randIter(g.iterPrev),
171 0 : OpIterPrevWithLimit: g.randIter(g.iterPrevWithLimit),
172 0 : OpIterSeekGE: g.randIter(g.iterSeekGE),
173 0 : OpIterSeekGEWithLimit: g.randIter(g.iterSeekGEWithLimit),
174 0 : OpIterSeekLT: g.randIter(g.iterSeekLT),
175 0 : OpIterSeekLTWithLimit: g.randIter(g.iterSeekLTWithLimit),
176 0 : OpIterSeekPrefixGE: g.randIter(g.iterSeekPrefixGE),
177 0 : OpIterSetBounds: g.randIter(g.iterSetBounds),
178 0 : OpIterSetOptions: g.randIter(g.iterSetOptions),
179 0 : OpNewBatch: g.newBatch,
180 0 : OpNewIndexedBatch: g.newIndexedBatch,
181 0 : OpNewIter: g.newIter,
182 0 : OpNewIterUsingClone: g.newIterUsingClone,
183 0 : OpNewSnapshot: g.newSnapshot,
184 0 : OpNewExternalObj: g.newExternalObj,
185 0 : OpReaderGet: g.readerGet,
186 0 : OpReplicate: g.replicate,
187 0 : OpSnapshotClose: g.snapshotClose,
188 0 : OpWriterApply: g.writerApply,
189 0 : OpWriterDelete: g.writerDelete,
190 0 : OpWriterDeleteRange: g.writerDeleteRange,
191 0 : OpWriterIngest: g.writerIngest,
192 0 : OpWriterIngestAndExcise: g.writerIngestAndExcise,
193 0 : OpWriterIngestExternalFiles: g.writerIngestExternalFiles,
194 0 : OpWriterLogData: g.writerLogData,
195 0 : OpWriterMerge: g.writerMerge,
196 0 : OpWriterRangeKeyDelete: g.writerRangeKeyDelete,
197 0 : OpWriterRangeKeySet: g.writerRangeKeySet,
198 0 : OpWriterRangeKeyUnset: g.writerRangeKeyUnset,
199 0 : OpWriterSet: g.writerSet,
200 0 : OpWriterSingleDelete: g.writerSingleDelete,
201 0 : }
202 0 :
203 0 : // TPCC-style deck of cards randomization. Every time the end of the deck is
204 0 : // reached, we shuffle the deck.
205 0 : deck := randvar.NewDeck(g.rng, cfg.ops[:]...)
206 0 :
207 0 : defer func() {
208 0 : if r := recover(); r != nil {
209 0 : fmt.Fprintln(os.Stderr, formatOps(g.ops))
210 0 : panic(r)
211 : }
212 : }()
213 0 : for i := uint64(0); i < count; i++ {
214 0 : opGenerators[deck.Int()]()
215 0 : }
216 :
217 0 : g.dbClose()
218 0 :
219 0 : computeDerivedFields(g.ops)
220 0 : return g.ops
221 : }
222 :
223 0 : func (g *generator) add(op op) {
224 0 : g.ops = append(g.ops, op)
225 0 : g.keyManager.update(op)
226 0 : }
227 :
228 : // prefixKeyRange generates a [start, end) pair consisting of two prefix keys.
229 0 : func (g *generator) prefixKeyRange() ([]byte, []byte) {
230 0 : keys := g.keyGenerator.UniqueKeys(2, func() []byte { return g.keyGenerator.RandPrefix(0.01) })
231 0 : return keys[0], keys[1]
232 : }
233 :
234 0 : func (g *generator) randKeyToSingleDelete(id objID) []byte {
235 0 : keys := g.keyManager.eligibleSingleDeleteKeys(id)
236 0 : length := len(keys)
237 0 : if length == 0 {
238 0 : return nil
239 0 : }
240 0 : return keys[g.rng.Intn(length)]
241 : }
242 :
243 0 : func resizeBuffer(buf []byte, prefixLen, suffixLen int) []byte {
244 0 : if cap(buf) >= prefixLen+suffixLen {
245 0 : return buf[:prefixLen+suffixLen]
246 0 : }
247 0 : return make([]byte, prefixLen+suffixLen)
248 : }
249 :
250 0 : func (g *generator) newBatch() {
251 0 : batchID := makeObjID(batchTag, g.init.batchSlots)
252 0 : g.init.batchSlots++
253 0 : g.liveBatches = append(g.liveBatches, batchID)
254 0 : g.liveWriters = append(g.liveWriters, batchID)
255 0 : dbID := g.dbs.rand(g.rng)
256 0 : g.objDB[batchID] = dbID
257 0 :
258 0 : g.add(&newBatchOp{
259 0 : dbID: dbID,
260 0 : batchID: batchID,
261 0 : })
262 0 : }
263 :
264 0 : func (g *generator) newIndexedBatch() {
265 0 : batchID := makeObjID(batchTag, g.init.batchSlots)
266 0 : g.init.batchSlots++
267 0 : g.liveBatches = append(g.liveBatches, batchID)
268 0 : g.liveReaders = append(g.liveReaders, batchID)
269 0 : g.liveWriters = append(g.liveWriters, batchID)
270 0 :
271 0 : iters := make(objIDSet)
272 0 : g.batches[batchID] = iters
273 0 : g.readers[batchID] = iters
274 0 : dbID := g.dbs.rand(g.rng)
275 0 : g.objDB[batchID] = dbID
276 0 :
277 0 : g.add(&newIndexedBatchOp{
278 0 : dbID: dbID,
279 0 : batchID: batchID,
280 0 : })
281 0 : }
282 :
283 : // removeFromBatchGenerator will not generate a closeOp for the target batch as
284 : // not every batch that is removed from the generator should be closed. For
285 : // example, running a closeOp before an ingestOp that contains the closed batch
286 : // will cause an error.
287 0 : func (g *generator) removeBatchFromGenerator(batchID objID) {
288 0 : g.liveBatches.remove(batchID)
289 0 : iters := g.batches[batchID]
290 0 : delete(g.batches, batchID)
291 0 :
292 0 : if iters != nil {
293 0 : g.liveReaders.remove(batchID)
294 0 : delete(g.readers, batchID)
295 0 : }
296 0 : g.liveWriters.remove(batchID)
297 0 : for _, id := range iters.sorted() {
298 0 : g.liveIters.remove(id)
299 0 : delete(g.iters, id)
300 0 : g.add(&closeOp{objID: id})
301 0 : }
302 : }
303 :
304 0 : func (g *generator) batchAbort() {
305 0 : if len(g.liveBatches) == 0 {
306 0 : return
307 0 : }
308 :
309 0 : batchID := g.liveBatches.rand(g.rng)
310 0 : g.removeBatchFromGenerator(batchID)
311 0 :
312 0 : g.add(&closeOp{objID: batchID})
313 : }
314 :
315 0 : func (g *generator) batchCommit() {
316 0 : if len(g.liveBatches) == 0 {
317 0 : return
318 0 : }
319 :
320 0 : batchID := g.liveBatches.rand(g.rng)
321 0 : dbID := g.objDB[batchID]
322 0 : g.removeBatchFromGenerator(batchID)
323 0 :
324 0 : // The batch we're applying may contain single delete tombstones that when
325 0 : // applied to the writer result in nondeterminism in the deleted key. If
326 0 : // that's the case, we can restore determinism by first deleting the key
327 0 : // from the writer.
328 0 : //
329 0 : // Generating additional operations here is not ideal, but it simplifies
330 0 : // single delete invariants significantly.
331 0 : singleDeleteConflicts := g.keyManager.checkForSingleDelConflicts(batchID, dbID, false /* collapsed */)
332 0 : for _, conflict := range singleDeleteConflicts {
333 0 : g.add(&deleteOp{
334 0 : writerID: dbID,
335 0 : key: conflict,
336 0 : derivedDBID: dbID,
337 0 : })
338 0 : }
339 :
340 0 : g.add(&batchCommitOp{
341 0 : dbID: dbID,
342 0 : batchID: batchID,
343 0 : })
344 0 : g.add(&closeOp{objID: batchID})
345 :
346 : }
347 :
348 0 : func (g *generator) dbClose() {
349 0 : // Close any live iterators and snapshots, so that we can close the DB
350 0 : // cleanly.
351 0 : for len(g.liveIters) > 0 {
352 0 : g.randIter(g.iterClose)()
353 0 : }
354 0 : for len(g.liveSnapshots) > 0 {
355 0 : g.snapshotClose()
356 0 : }
357 0 : for len(g.liveBatches) > 0 {
358 0 : batchID := g.liveBatches[0]
359 0 : g.removeBatchFromGenerator(batchID)
360 0 : g.add(&closeOp{objID: batchID})
361 0 : }
362 0 : for len(g.dbs) > 0 {
363 0 : db := g.dbs[0]
364 0 : g.dbs = g.dbs[1:]
365 0 : g.add(&closeOp{objID: db})
366 0 : }
367 : }
368 :
369 0 : func (g *generator) dbCheckpoint() {
370 0 : numSpans := g.expRandInt(1)
371 0 : var spans []pebble.CheckpointSpan
372 0 : if numSpans > 0 {
373 0 : spans = make([]pebble.CheckpointSpan, numSpans)
374 0 : }
375 0 : for i := range spans {
376 0 : start := g.keyGenerator.RandKey(0.01)
377 0 : end := g.keyGenerator.RandKey(0.01)
378 0 : if g.cmp(start, end) > 0 {
379 0 : start, end = end, start
380 0 : }
381 0 : spans[i].Start = start
382 0 : spans[i].End = end
383 : }
384 0 : dbID := g.dbs.rand(g.rng)
385 0 : g.add(&checkpointOp{
386 0 : dbID: dbID,
387 0 : spans: spans,
388 0 : })
389 : }
390 :
391 0 : func (g *generator) dbCompact() {
392 0 : // Generate new key(s) with a 1% probability.
393 0 : start := g.keyGenerator.RandKey(0.01)
394 0 : end := g.keyGenerator.RandKey(0.01)
395 0 : if g.cmp(start, end) > 0 {
396 0 : start, end = end, start
397 0 : }
398 0 : dbID := g.dbs.rand(g.rng)
399 0 : g.add(&compactOp{
400 0 : dbID: dbID,
401 0 : start: start,
402 0 : end: end,
403 0 : parallelize: g.rng.Float64() < 0.5,
404 0 : })
405 : }
406 :
407 0 : func (g *generator) dbDownload() {
408 0 : numSpans := 1 + g.expRandInt(1)
409 0 : spans := make([]pebble.DownloadSpan, numSpans)
410 0 : for i := range spans {
411 0 : keys := g.keyGenerator.UniqueKeys(2, func() []byte { return g.keyGenerator.RandKey(0.001) })
412 0 : start, end := keys[0], keys[1]
413 0 : spans[i].StartKey = start
414 0 : spans[i].EndKey = end
415 0 : spans[i].ViaBackingFileDownload = g.rng.Intn(2) == 0
416 : }
417 0 : dbID := g.dbs.rand(g.rng)
418 0 : g.add(&downloadOp{
419 0 : dbID: dbID,
420 0 : spans: spans,
421 0 : })
422 : }
423 :
424 0 : func (g *generator) dbFlush() {
425 0 : g.add(&flushOp{g.dbs.rand(g.rng)})
426 0 : }
427 :
428 0 : func (g *generator) dbRatchetFormatMajorVersion() {
429 0 : // Ratchet to a random format major version between the minimum the
430 0 : // metamorphic tests support and the newest. At runtime, the generated
431 0 : // version may be behind the database's format major version, in which case
432 0 : // RatchetFormatMajorVersion should deterministically error.
433 0 :
434 0 : dbID := g.dbs.rand(g.rng)
435 0 : n := int(newestFormatMajorVersionToTest - minimumFormatMajorVersion)
436 0 : vers := pebble.FormatMajorVersion(g.rng.Intn(n+1)) + minimumFormatMajorVersion
437 0 : g.add(&dbRatchetFormatMajorVersionOp{dbID: dbID, vers: vers})
438 0 : }
439 :
440 0 : func (g *generator) dbRestart() {
441 0 : // Close any live iterators and snapshots, so that we can close the DB
442 0 : // cleanly.
443 0 : dbID := g.dbs.rand(g.rng)
444 0 : for len(g.liveIters) > 0 {
445 0 : g.randIter(g.iterClose)()
446 0 : }
447 0 : for len(g.liveSnapshots) > 0 {
448 0 : g.snapshotClose()
449 0 : }
450 : // Close the batches.
451 0 : for len(g.liveBatches) > 0 {
452 0 : batchID := g.liveBatches[0]
453 0 : g.removeBatchFromGenerator(batchID)
454 0 : g.add(&closeOp{objID: batchID})
455 0 : }
456 0 : if len(g.liveReaders) != len(g.dbs) || len(g.liveWriters) != len(g.dbs) {
457 0 : panic(fmt.Sprintf("unexpected counts: liveReaders %d, liveWriters: %d",
458 0 : len(g.liveReaders), len(g.liveWriters)))
459 : }
460 0 : g.add(&dbRestartOp{dbID: dbID})
461 : }
462 :
463 : // maybeSetSnapshotIterBounds must be called whenever creating a new iterator or
464 : // modifying the bounds of an iterator. If the iterator is backed by a snapshot
465 : // that only guarantees consistency within a limited set of key spans, then the
466 : // iterator must set bounds within one of the snapshot's consistent keyspans. It
467 : // returns true if the provided readerID is a bounded snapshot and bounds were
468 : // set.
469 0 : func (g *generator) maybeSetSnapshotIterBounds(readerID objID, opts *iterOpts) bool {
470 0 : snapBounds, isBoundedSnapshot := g.snapshotBounds[readerID]
471 0 : if !isBoundedSnapshot {
472 0 : return false
473 0 : }
474 : // Pick a random keyrange within one of the snapshot's key ranges.
475 0 : parentBounds := pickOneUniform(g.rng, snapBounds)
476 0 : // With 10% probability, use the parent start bound as-is.
477 0 : if g.rng.Float64() <= 0.1 {
478 0 : opts.lower = parentBounds.Start
479 0 : } else {
480 0 : opts.lower = g.keyGenerator.RandKeyInRange(0.1, parentBounds)
481 0 : }
482 : // With 10% probability, use the parent end bound as-is.
483 0 : if g.rng.Float64() <= 0.1 {
484 0 : opts.upper = parentBounds.End
485 0 : } else {
486 0 : opts.upper = g.keyGenerator.RandKeyInRange(0.1, pebble.KeyRange{
487 0 : Start: opts.lower,
488 0 : End: parentBounds.End,
489 0 : })
490 0 : }
491 0 : return true
492 : }
493 :
494 0 : func (g *generator) newIter() {
495 0 : iterID := makeObjID(iterTag, g.init.iterSlots)
496 0 : g.init.iterSlots++
497 0 : g.liveIters = append(g.liveIters, iterID)
498 0 :
499 0 : readerID := g.liveReaders.rand(g.rng)
500 0 : if iters := g.readers[readerID]; iters != nil {
501 0 : iters[iterID] = struct{}{}
502 0 : g.iters[iterID] = iters
503 0 : //lint:ignore SA9003 - readability
504 0 : } else {
505 0 : // NB: the DB object does not track its open iterators because it never
506 0 : // closes.
507 0 : }
508 0 : g.iterReaderID[iterID] = readerID
509 0 : dbID := g.deriveDB(iterID)
510 0 :
511 0 : var opts iterOpts
512 0 : if !g.maybeSetSnapshotIterBounds(readerID, &opts) {
513 0 : // Generate lower/upper bounds with a 10% probability.
514 0 : if g.rng.Float64() <= 0.1 {
515 0 : // Generate a new key with a .1% probability.
516 0 : opts.lower = g.keyGenerator.RandKey(0.001)
517 0 : }
518 0 : if g.rng.Float64() <= 0.1 {
519 0 : // Generate a new key with a .1% probability.
520 0 : opts.upper = g.keyGenerator.RandKey(0.001)
521 0 : }
522 0 : if g.cmp(opts.lower, opts.upper) > 0 {
523 0 : opts.lower, opts.upper = opts.upper, opts.lower
524 0 : }
525 : }
526 0 : opts.keyTypes, opts.maskSuffix = g.randKeyTypesAndMask()
527 0 :
528 0 : // With 10% probability, enable automatic filtering of keys with suffixes
529 0 : // not in the provided range. This filtering occurs both through
530 0 : // block-property filtering and explicitly within the iterator operations to
531 0 : // ensure determinism.
532 0 : if g.rng.Float64() <= 0.1 {
533 0 : opts.filterMin = uint64(g.keyGenerator.UniformSuffixInt() + 1)
534 0 : opts.filterMax = uint64(g.keyGenerator.UniformSuffixInt() + 1)
535 0 : if opts.filterMin > opts.filterMax {
536 0 : opts.filterMin, opts.filterMax = opts.filterMax, opts.filterMin
537 0 : } else if opts.filterMin == opts.filterMax {
538 0 : opts.filterMax++
539 0 : }
540 : }
541 :
542 : // Enable L6 filters with a 10% probability.
543 0 : if g.rng.Float64() <= 0.1 {
544 0 : opts.useL6Filters = true
545 0 : }
546 :
547 0 : g.itersLastOpts[iterID] = opts
548 0 : g.iterCreationTimestamp[iterID] = g.keyManager.nextMetaTimestamp()
549 0 : g.iterReaderID[iterID] = readerID
550 0 : g.add(&newIterOp{
551 0 : readerID: readerID,
552 0 : iterID: iterID,
553 0 : iterOpts: opts,
554 0 : derivedDBID: dbID,
555 0 : })
556 : }
557 :
558 0 : func (g *generator) randKeyTypesAndMask() (keyTypes uint32, maskSuffix []byte) {
559 0 : // Iterate over different key types.
560 0 : p := g.rng.Float64()
561 0 : switch {
562 0 : case p < 0.2: // 20% probability
563 0 : keyTypes = uint32(pebble.IterKeyTypePointsOnly)
564 0 : case p < 0.8: // 60% probability
565 0 : keyTypes = uint32(pebble.IterKeyTypePointsAndRanges)
566 0 : // With 50% probability, enable masking.
567 0 : if g.rng.Intn(2) == 1 {
568 0 : maskSuffix = g.keyGenerator.UniformSuffix()
569 0 : }
570 0 : default: // 20% probability
571 0 : keyTypes = uint32(pebble.IterKeyTypeRangesOnly)
572 : }
573 0 : return keyTypes, maskSuffix
574 : }
575 :
576 0 : func (g *generator) deriveDB(readerID objID) objID {
577 0 : if readerID.tag() == iterTag {
578 0 : readerID = g.iterReaderID[readerID]
579 0 : }
580 0 : dbParentID := readerID
581 0 : if dbParentID.tag() != dbTag {
582 0 : dbParentID = g.objDB[dbParentID]
583 0 : }
584 0 : g.objDB[readerID] = dbParentID
585 0 : return dbParentID
586 : }
587 :
588 0 : func (g *generator) newIterUsingClone() {
589 0 : if len(g.liveIters) == 0 {
590 0 : return
591 0 : }
592 0 : existingIterID := g.liveIters.rand(g.rng)
593 0 : iterID := makeObjID(iterTag, g.init.iterSlots)
594 0 : g.init.iterSlots++
595 0 : g.liveIters = append(g.liveIters, iterID)
596 0 : if iters := g.iters[existingIterID]; iters != nil {
597 0 : iters[iterID] = struct{}{}
598 0 : g.iters[iterID] = iters
599 0 : //lint:ignore SA9003 - readability
600 0 : } else {
601 0 : // NB: the DB object does not track its open iterators because it never
602 0 : // closes.
603 0 : }
604 0 : readerID := g.iterReaderID[existingIterID]
605 0 : g.iterReaderID[iterID] = readerID
606 0 : g.deriveDB(iterID)
607 0 :
608 0 : var refreshBatch bool
609 0 : if readerID.tag() == batchTag {
610 0 : refreshBatch = g.rng.Intn(2) == 1
611 0 : }
612 :
613 0 : opts := g.itersLastOpts[existingIterID]
614 0 : // With 50% probability, consider modifying the iterator options used by the
615 0 : // clone.
616 0 : if g.rng.Intn(2) == 1 {
617 0 : g.maybeMutateOptions(readerID, &opts)
618 0 : }
619 0 : g.itersLastOpts[iterID] = opts
620 0 :
621 0 : g.iterCreationTimestamp[iterID] = g.keyManager.nextMetaTimestamp()
622 0 : g.iterReaderID[iterID] = g.iterReaderID[existingIterID]
623 0 : g.add(&newIterUsingCloneOp{
624 0 : existingIterID: existingIterID,
625 0 : iterID: iterID,
626 0 : refreshBatch: refreshBatch,
627 0 : iterOpts: opts,
628 0 : derivedReaderID: readerID,
629 0 : })
630 : }
631 :
632 0 : func (g *generator) iterClose(iterID objID) {
633 0 : g.liveIters.remove(iterID)
634 0 : if readerIters, ok := g.iters[iterID]; ok {
635 0 : delete(g.iters, iterID)
636 0 : delete(readerIters, iterID)
637 0 : }
638 :
639 0 : g.add(&closeOp{objID: iterID})
640 : }
641 :
642 0 : func (g *generator) iterSetBounds(iterID objID) {
643 0 : iterLastOpts := g.itersLastOpts[iterID]
644 0 : newOpts := iterLastOpts
645 0 : // TODO(jackson): The logic to increase the probability of advancing bounds
646 0 : // monotonically only applies if the snapshot is not bounded. Refactor to
647 0 : // allow bounded snapshots to benefit too, when possible.
648 0 : if !g.maybeSetSnapshotIterBounds(g.iterReaderID[iterID], &newOpts) {
649 0 : var lower, upper []byte
650 0 : genLower := g.rng.Float64() <= 0.9
651 0 : genUpper := g.rng.Float64() <= 0.9
652 0 : // When one of ensureLowerGE, ensureUpperLE is true, the new bounds
653 0 : // don't overlap with the previous bounds.
654 0 : var ensureLowerGE, ensureUpperLE bool
655 0 : if genLower && iterLastOpts.upper != nil && g.rng.Float64() <= 0.9 {
656 0 : ensureLowerGE = true
657 0 : }
658 0 : if (!ensureLowerGE || g.rng.Float64() < 0.5) && genUpper && iterLastOpts.lower != nil {
659 0 : ensureUpperLE = true
660 0 : ensureLowerGE = false
661 0 : }
662 0 : attempts := 0
663 0 : for {
664 0 : attempts++
665 0 : if genLower {
666 0 : // Generate a new key with a .1% probability.
667 0 : lower = g.keyGenerator.RandKey(0.001)
668 0 : }
669 0 : if genUpper {
670 0 : // Generate a new key with a .1% probability.
671 0 : upper = g.keyGenerator.RandKey(0.001)
672 0 : }
673 0 : if g.cmp(lower, upper) > 0 {
674 0 : lower, upper = upper, lower
675 0 : }
676 0 : if ensureLowerGE && g.cmp(iterLastOpts.upper, lower) > 0 {
677 0 : if attempts < 25 {
678 0 : continue
679 : }
680 0 : lower = iterLastOpts.upper
681 0 : upper = lower
682 0 : break
683 : }
684 0 : if ensureUpperLE && g.cmp(upper, iterLastOpts.lower) > 0 {
685 0 : if attempts < 25 {
686 0 : continue
687 : }
688 0 : upper = iterLastOpts.lower
689 0 : lower = upper
690 0 : break
691 : }
692 0 : break
693 : }
694 0 : newOpts.lower = lower
695 0 : newOpts.upper = upper
696 : }
697 0 : g.itersLastOpts[iterID] = newOpts
698 0 : g.add(&iterSetBoundsOp{
699 0 : iterID: iterID,
700 0 : lower: newOpts.lower,
701 0 : upper: newOpts.upper,
702 0 : })
703 0 : // Additionally seek the iterator in a manner consistent with the bounds,
704 0 : // and do some steps (Next/Prev). The seeking exercises typical
705 0 : // CockroachDB behavior when using iterators and the steps are trying to
706 0 : // stress the region near the bounds. Ideally, we should not do this as
707 0 : // part of generating a single op, but this is easier than trying to
708 0 : // control future op generation via generator state.
709 0 : doSeekLT := newOpts.upper != nil && g.rng.Float64() < 0.5
710 0 : doSeekGE := newOpts.lower != nil && g.rng.Float64() < 0.5
711 0 : if doSeekLT && doSeekGE {
712 0 : // Pick the seek.
713 0 : if g.rng.Float64() < 0.5 {
714 0 : doSeekGE = false
715 0 : } else {
716 0 : doSeekLT = false
717 0 : }
718 : }
719 0 : if doSeekLT {
720 0 : g.add(&iterSeekLTOp{
721 0 : iterID: iterID,
722 0 : key: newOpts.upper,
723 0 : derivedReaderID: g.iterReaderID[iterID],
724 0 : })
725 0 : if g.rng.Float64() < 0.5 {
726 0 : g.iterNext(iterID)
727 0 : }
728 0 : if g.rng.Float64() < 0.5 {
729 0 : g.iterNext(iterID)
730 0 : }
731 0 : if g.rng.Float64() < 0.5 {
732 0 : g.iterPrev(iterID)
733 0 : }
734 0 : } else if doSeekGE {
735 0 : g.add(&iterSeekGEOp{
736 0 : iterID: iterID,
737 0 : key: newOpts.lower,
738 0 : derivedReaderID: g.iterReaderID[iterID],
739 0 : })
740 0 : if g.rng.Float64() < 0.5 {
741 0 : g.iterPrev(iterID)
742 0 : }
743 0 : if g.rng.Float64() < 0.5 {
744 0 : g.iterPrev(iterID)
745 0 : }
746 0 : if g.rng.Float64() < 0.5 {
747 0 : g.iterNext(iterID)
748 0 : }
749 : }
750 : }
751 :
752 0 : func (g *generator) iterSetOptions(iterID objID) {
753 0 : opts := g.itersLastOpts[iterID]
754 0 : g.maybeMutateOptions(g.iterReaderID[iterID], &opts)
755 0 : g.itersLastOpts[iterID] = opts
756 0 : g.add(&iterSetOptionsOp{
757 0 : iterID: iterID,
758 0 : iterOpts: opts,
759 0 : derivedReaderID: g.iterReaderID[iterID],
760 0 : })
761 0 :
762 0 : // Additionally, perform a random absolute positioning operation. The
763 0 : // SetOptions contract requires one before the next relative positioning
764 0 : // operation. Ideally, we should not do this as part of generating a single
765 0 : // op, but this is easier than trying to control future op generation via
766 0 : // generator state.
767 0 : pickOneUniform(
768 0 : g.rng,
769 0 : []func(objID){
770 0 : g.iterFirst,
771 0 : g.iterLast,
772 0 : g.iterSeekGE,
773 0 : g.iterSeekGEWithLimit,
774 0 : g.iterSeekPrefixGE,
775 0 : g.iterSeekLT,
776 0 : g.iterSeekLTWithLimit,
777 0 : },
778 0 : )(iterID)
779 0 : }
780 :
781 0 : func (g *generator) iterSeekGE(iterID objID) {
782 0 : g.add(&iterSeekGEOp{
783 0 : iterID: iterID,
784 0 : key: g.keyGenerator.RandKey(0.001), // 0.1% new keys
785 0 : derivedReaderID: g.iterReaderID[iterID],
786 0 : })
787 0 : }
788 :
789 0 : func (g *generator) iterSeekGEWithLimit(iterID objID) {
790 0 : // 0.1% new keys
791 0 : key, limit := g.keyGenerator.RandKey(0.001), g.keyGenerator.RandKey(0.001)
792 0 : if g.cmp(key, limit) > 0 {
793 0 : key, limit = limit, key
794 0 : }
795 0 : g.add(&iterSeekGEOp{
796 0 : iterID: iterID,
797 0 : key: key,
798 0 : limit: limit,
799 0 : derivedReaderID: g.iterReaderID[iterID],
800 0 : })
801 : }
802 :
803 0 : func (g *generator) iterSeekPrefixGE(iterID objID) {
804 0 : lower := g.itersLastOpts[iterID].lower
805 0 : upper := g.itersLastOpts[iterID].upper
806 0 : iterCreationTimestamp := g.iterCreationTimestamp[iterID]
807 0 : var key []byte
808 0 :
809 0 : // We try to make sure that the SeekPrefixGE key is within the iter bounds,
810 0 : // and that the iter can read the key. If the key was created on a batch
811 0 : // which deleted the key, then the key will still be considered visible
812 0 : // by the current logic. We're also not accounting for keys written to
813 0 : // batches which haven't been presisted to the DB. But we're only picking
814 0 : // keys in a best effort manner, and the logic is better than picking a
815 0 : // random key.
816 0 : if g.rng.Intn(10) >= 1 {
817 0 : possibleKeys := make([][]byte, 0, 100)
818 0 : inRangeKeys := g.keyManager.InRangeKeysForObj(g.objDB[iterID], lower, upper)
819 0 : for _, keyMeta := range inRangeKeys {
820 0 : visibleHistory := keyMeta.history.before(iterCreationTimestamp)
821 0 :
822 0 : // Check if the last op on this key set a value, (eg SETs, MERGEs).
823 0 : // If the key should be visible to the iterator and it would make a
824 0 : // good candidate for a SeekPrefixGE.
825 0 : if visibleHistory.hasVisibleValue() {
826 0 : possibleKeys = append(possibleKeys, keyMeta.key)
827 0 : }
828 : }
829 :
830 0 : if len(possibleKeys) > 0 {
831 0 : key = possibleKeys[g.rng.Int31n(int32(len(possibleKeys)))]
832 0 : }
833 : }
834 :
835 0 : if key == nil {
836 0 : // TODO(bananabrick): We should try and use keys within the bounds,
837 0 : // even if we couldn't find any keys visible to the iterator. However,
838 0 : // doing this in experiments didn't really increase the valid
839 0 : // SeekPrefixGE calls by much.
840 0 : key = g.keyGenerator.RandKey(0) // 0% new keys
841 0 : }
842 :
843 0 : g.add(&iterSeekPrefixGEOp{
844 0 : iterID: iterID,
845 0 : key: key,
846 0 : derivedReaderID: g.iterReaderID[iterID],
847 0 : })
848 : }
849 :
850 0 : func (g *generator) iterSeekLT(iterID objID) {
851 0 : g.add(&iterSeekLTOp{
852 0 : iterID: iterID,
853 0 : key: g.keyGenerator.RandKey(0.001), // 0.1% new keys
854 0 : derivedReaderID: g.iterReaderID[iterID],
855 0 : })
856 0 : }
857 :
858 0 : func (g *generator) iterSeekLTWithLimit(iterID objID) {
859 0 : // 0.1% new keys
860 0 : key, limit := g.keyGenerator.RandKey(0.001), g.keyGenerator.RandKey(0.001)
861 0 : if g.cmp(limit, key) > 0 {
862 0 : key, limit = limit, key
863 0 : }
864 0 : g.add(&iterSeekLTOp{
865 0 : iterID: iterID,
866 0 : key: key,
867 0 : limit: limit,
868 0 : derivedReaderID: g.iterReaderID[iterID],
869 0 : })
870 : }
871 :
872 : // randIter performs partial func application ("currying"), returning a new
873 : // function that supplies the given func with a random iterator.
874 0 : func (g *generator) randIter(gen func(objID)) func() {
875 0 : return func() {
876 0 : if len(g.liveIters) == 0 {
877 0 : return
878 0 : }
879 0 : gen(g.liveIters.rand(g.rng))
880 : }
881 : }
882 :
883 0 : func (g *generator) iterFirst(iterID objID) {
884 0 : g.add(&iterFirstOp{
885 0 : iterID: iterID,
886 0 : derivedReaderID: g.iterReaderID[iterID],
887 0 : })
888 0 : }
889 :
890 0 : func (g *generator) iterLast(iterID objID) {
891 0 : g.add(&iterLastOp{
892 0 : iterID: iterID,
893 0 : derivedReaderID: g.iterReaderID[iterID],
894 0 : })
895 0 : }
896 :
897 0 : func (g *generator) iterNext(iterID objID) {
898 0 : g.add(&iterNextOp{
899 0 : iterID: iterID,
900 0 : derivedReaderID: g.iterReaderID[iterID],
901 0 : })
902 0 : }
903 :
904 0 : func (g *generator) iterPrev(iterID objID) {
905 0 : g.add(&iterPrevOp{
906 0 : iterID: iterID,
907 0 : derivedReaderID: g.iterReaderID[iterID],
908 0 : })
909 0 : }
910 :
911 0 : func (g *generator) iterNextWithLimit(iterID objID) {
912 0 : g.add(&iterNextOp{
913 0 : iterID: iterID,
914 0 : limit: g.keyGenerator.RandKey(0.001), // 0.1% new keys
915 0 : derivedReaderID: g.iterReaderID[iterID],
916 0 : })
917 0 : }
918 :
919 0 : func (g *generator) iterNextPrefix(iterID objID) {
920 0 : g.add(&iterNextPrefixOp{
921 0 : iterID: iterID,
922 0 : derivedReaderID: g.iterReaderID[iterID],
923 0 : })
924 0 : }
925 :
926 0 : func (g *generator) iterCanSingleDelete(iterID objID) {
927 0 : g.add(&iterCanSingleDelOp{
928 0 : iterID: iterID,
929 0 : derivedReaderID: g.iterReaderID[iterID],
930 0 : })
931 0 : }
932 :
933 0 : func (g *generator) iterPrevWithLimit(iterID objID) {
934 0 : g.add(&iterPrevOp{
935 0 : iterID: iterID,
936 0 : limit: g.keyGenerator.RandKey(0.001), // 0.1% new keys
937 0 : derivedReaderID: g.iterReaderID[iterID],
938 0 : })
939 0 : }
940 :
941 0 : func (g *generator) readerGet() {
942 0 : if len(g.liveReaders) == 0 {
943 0 : return
944 0 : }
945 :
946 0 : readerID := g.liveReaders.rand(g.rng)
947 0 :
948 0 : // If the chosen reader is a snapshot created with user-specified key
949 0 : // ranges, restrict the read to fall within one of the provided key ranges.
950 0 : var key []byte
951 0 : if bounds := g.snapshotBounds[readerID]; len(bounds) > 0 {
952 0 : kr := bounds[g.rng.Intn(len(bounds))]
953 0 : key = g.keyGenerator.RandKeyInRange(0.001, kr) // 0.1% new keys
954 0 : } else {
955 0 : key = g.keyGenerator.RandKey(0.001) // 0.1% new keys
956 0 : }
957 0 : derivedDBID := objID(0)
958 0 : if readerID.tag() == batchTag || readerID.tag() == snapTag {
959 0 : derivedDBID = g.deriveDB(readerID)
960 0 : }
961 0 : g.add(&getOp{readerID: readerID, key: key, derivedDBID: derivedDBID})
962 : }
963 :
964 0 : func (g *generator) replicate() {
965 0 : if len(g.dbs) < 2 {
966 0 : return
967 0 : }
968 :
969 0 : source := g.dbs.rand(g.rng)
970 0 : dest := source
971 0 : for dest == source {
972 0 : dest = g.dbs.rand(g.rng)
973 0 : }
974 :
975 0 : startKey, endKey := g.prefixKeyRange()
976 0 : g.add(&replicateOp{
977 0 : source: source,
978 0 : dest: dest,
979 0 : start: startKey,
980 0 : end: endKey,
981 0 : })
982 : }
983 :
984 : // generateDisjointKeyRanges generates n disjoint key ranges.
985 0 : func (g *generator) generateDisjointKeyRanges(n int) []pebble.KeyRange {
986 0 : keys := g.keyGenerator.UniqueKeys(2*n, func() []byte { return g.keyGenerator.RandPrefix(0.1) })
987 0 : keyRanges := make([]pebble.KeyRange, n)
988 0 : for i := range keyRanges {
989 0 : keyRanges[i] = pebble.KeyRange{
990 0 : Start: keys[i*2],
991 0 : End: keys[i*2+1],
992 0 : }
993 0 : }
994 0 : return keyRanges
995 : }
996 :
997 0 : func (g *generator) newSnapshot() {
998 0 : snapID := makeObjID(snapTag, g.init.snapshotSlots)
999 0 : g.init.snapshotSlots++
1000 0 : g.liveSnapshots = append(g.liveSnapshots, snapID)
1001 0 : g.liveReaders = append(g.liveReaders, snapID)
1002 0 : dbID := g.dbs.rand(g.rng)
1003 0 : g.objDB[snapID] = dbID
1004 0 :
1005 0 : iters := make(objIDSet)
1006 0 : g.snapshots[snapID] = iters
1007 0 : g.readers[snapID] = iters
1008 0 :
1009 0 : s := &newSnapshotOp{
1010 0 : dbID: dbID,
1011 0 : snapID: snapID,
1012 0 : }
1013 0 :
1014 0 : // Impose bounds on the keys that may be read with the snapshot. Setting bounds
1015 0 : // allows some runs of the metamorphic test to use a EventuallyFileOnlySnapshot
1016 0 : // instead of a Snapshot, testing equivalence between the two for reads within
1017 0 : // those bounds.
1018 0 : s.bounds = g.generateDisjointKeyRanges(
1019 0 : 1 + g.expRandInt(3),
1020 0 : )
1021 0 : g.snapshotBounds[snapID] = s.bounds
1022 0 : g.add(s)
1023 0 : }
1024 :
1025 0 : func (g *generator) snapshotClose() {
1026 0 : if len(g.liveSnapshots) == 0 {
1027 0 : return
1028 0 : }
1029 :
1030 0 : snapID := g.liveSnapshots.rand(g.rng)
1031 0 : g.liveSnapshots.remove(snapID)
1032 0 : iters := g.snapshots[snapID]
1033 0 : delete(g.snapshots, snapID)
1034 0 : g.liveReaders.remove(snapID)
1035 0 : delete(g.readers, snapID)
1036 0 :
1037 0 : for _, id := range iters.sorted() {
1038 0 : g.liveIters.remove(id)
1039 0 : delete(g.iters, id)
1040 0 : g.add(&closeOp{objID: id})
1041 0 : }
1042 :
1043 0 : g.add(&closeOp{objID: snapID})
1044 : }
1045 :
1046 0 : func (g *generator) newExternalObj() {
1047 0 : if len(g.liveBatches) == 0 {
1048 0 : return
1049 0 : }
1050 0 : var batchID objID
1051 0 : // Try to find a suitable batch.
1052 0 : for i := 0; ; i++ {
1053 0 : if i == 10 {
1054 0 : return
1055 0 : }
1056 0 : batchID = g.liveBatches.rand(g.rng)
1057 0 : okm := g.keyManager.objKeyMeta(batchID)
1058 0 : // #3287: IngestExternalFiles currently doesn't support range keys.
1059 0 : if !okm.bounds.IsUnset() && !okm.hasRangeKeys {
1060 0 : break
1061 : }
1062 : }
1063 0 : g.removeBatchFromGenerator(batchID)
1064 0 : objID := makeObjID(externalObjTag, g.init.externalObjSlots)
1065 0 : g.init.externalObjSlots++
1066 0 : g.externalObjects = append(g.externalObjects, objID)
1067 0 : g.add(&newExternalObjOp{
1068 0 : batchID: batchID,
1069 0 : externalObjID: objID,
1070 0 : })
1071 : }
1072 :
1073 0 : func (g *generator) writerApply() {
1074 0 : if len(g.liveBatches) == 0 {
1075 0 : return
1076 0 : }
1077 0 : if len(g.liveWriters) < 2 {
1078 0 : panic(fmt.Sprintf("insufficient liveWriters (%d) to apply batch", len(g.liveWriters)))
1079 : }
1080 :
1081 0 : batchID := g.liveBatches.rand(g.rng)
1082 0 : dbID := g.objDB[batchID]
1083 0 :
1084 0 : var writerID objID
1085 0 : for {
1086 0 : // NB: The writer we're applying to, as well as the batch we're applying,
1087 0 : // must be from the same DB. The writer could be the db itself. Applying
1088 0 : // a batch from one DB on another DB results in a panic, so avoid that.
1089 0 : writerID = g.liveWriters.rand(g.rng)
1090 0 : writerDBID := writerID
1091 0 : if writerID.tag() != dbTag {
1092 0 : writerDBID = g.objDB[writerID]
1093 0 : }
1094 0 : if writerID != batchID && writerDBID == dbID {
1095 0 : break
1096 : }
1097 : }
1098 :
1099 : // The batch we're applying may contain single delete tombstones that when
1100 : // applied to the writer result in nondeterminism in the deleted key. If
1101 : // that's the case, we can restore determinism by first deleting the key
1102 : // from the writer.
1103 : //
1104 : // Generating additional operations here is not ideal, but it simplifies
1105 : // single delete invariants significantly.
1106 0 : singleDeleteConflicts := g.keyManager.checkForSingleDelConflicts(batchID, writerID, false /* collapsed */)
1107 0 : for _, conflict := range singleDeleteConflicts {
1108 0 : g.add(&deleteOp{
1109 0 : writerID: writerID,
1110 0 : key: conflict,
1111 0 : derivedDBID: dbID,
1112 0 : })
1113 0 : }
1114 :
1115 0 : g.removeBatchFromGenerator(batchID)
1116 0 :
1117 0 : g.add(&applyOp{
1118 0 : writerID: writerID,
1119 0 : batchID: batchID,
1120 0 : })
1121 0 : g.add(&closeOp{
1122 0 : objID: batchID,
1123 0 : })
1124 : }
1125 :
1126 0 : func (g *generator) writerDelete() {
1127 0 : if len(g.liveWriters) == 0 {
1128 0 : return
1129 0 : }
1130 :
1131 0 : writerID := g.liveWriters.rand(g.rng)
1132 0 : derivedDBID := writerID
1133 0 : if derivedDBID.tag() != dbTag {
1134 0 : derivedDBID = g.objDB[writerID]
1135 0 : }
1136 0 : g.add(&deleteOp{
1137 0 : writerID: writerID,
1138 0 : key: g.keyGenerator.RandKey(0.001), // 0.1% new keys
1139 0 : derivedDBID: derivedDBID,
1140 0 : })
1141 : }
1142 :
1143 0 : func (g *generator) writerDeleteRange() {
1144 0 : if len(g.liveWriters) == 0 {
1145 0 : return
1146 0 : }
1147 :
1148 0 : keys := g.keyGenerator.UniqueKeys(2, func() []byte { return g.keyGenerator.RandKey(0.001) })
1149 0 : start, end := keys[0], keys[1]
1150 0 :
1151 0 : writerID := g.liveWriters.rand(g.rng)
1152 0 : g.add(&deleteRangeOp{
1153 0 : writerID: writerID,
1154 0 : start: start,
1155 0 : end: end,
1156 0 : })
1157 : }
1158 :
1159 0 : func (g *generator) writerRangeKeyDelete() {
1160 0 : if len(g.liveWriters) == 0 {
1161 0 : return
1162 0 : }
1163 0 : start, end := g.prefixKeyRange()
1164 0 :
1165 0 : writerID := g.liveWriters.rand(g.rng)
1166 0 : g.add(&rangeKeyDeleteOp{
1167 0 : writerID: writerID,
1168 0 : start: start,
1169 0 : end: end,
1170 0 : })
1171 : }
1172 :
1173 0 : func (g *generator) writerRangeKeySet() {
1174 0 : if len(g.liveWriters) == 0 {
1175 0 : return
1176 0 : }
1177 0 : start, end := g.prefixKeyRange()
1178 0 :
1179 0 : // 90% of the time, set a suffix.
1180 0 : var suffix []byte
1181 0 : if g.rng.Float64() < 0.90 {
1182 0 : // Increase the max suffix 5% of the time.
1183 0 : suffix = g.keyGenerator.SkewedSuffix(0.05)
1184 0 : }
1185 :
1186 0 : writerID := g.liveWriters.rand(g.rng)
1187 0 : g.add(&rangeKeySetOp{
1188 0 : writerID: writerID,
1189 0 : start: start,
1190 0 : end: end,
1191 0 : suffix: suffix,
1192 0 : value: randBytes(g.rng, 0, maxValueSize),
1193 0 : })
1194 : }
1195 :
1196 0 : func (g *generator) writerRangeKeyUnset() {
1197 0 : if len(g.liveWriters) == 0 {
1198 0 : return
1199 0 : }
1200 0 : start, end := g.prefixKeyRange()
1201 0 :
1202 0 : // 90% of the time, set a suffix.
1203 0 : var suffix []byte
1204 0 : if g.rng.Float64() < 0.90 {
1205 0 : // Increase the max suffix 5% of the time.
1206 0 : suffix = g.keyGenerator.SkewedSuffix(0.05)
1207 0 : }
1208 :
1209 : // TODO(jackson): Increase probability of effective unsets? Purely random
1210 : // unsets are unlikely to remove an active range key.
1211 :
1212 0 : writerID := g.liveWriters.rand(g.rng)
1213 0 : g.add(&rangeKeyUnsetOp{
1214 0 : writerID: writerID,
1215 0 : start: start,
1216 0 : end: end,
1217 0 : suffix: suffix,
1218 0 : })
1219 : }
1220 :
1221 0 : func (g *generator) writerIngest() {
1222 0 : if len(g.liveBatches) == 0 {
1223 0 : return
1224 0 : }
1225 :
1226 0 : dbID := g.dbs.rand(g.rng)
1227 0 : n := min(1+g.expRandInt(1), len(g.liveBatches))
1228 0 : batchIDs := make([]objID, n)
1229 0 : derivedDBIDs := make([]objID, n)
1230 0 : for i := 0; i < n; i++ {
1231 0 : batchID := g.liveBatches.rand(g.rng)
1232 0 : batchIDs[i] = batchID
1233 0 : derivedDBIDs[i] = g.objDB[batchIDs[i]]
1234 0 : g.removeBatchFromGenerator(batchID)
1235 0 : }
1236 :
1237 : // Ingestions may fail if the ingested sstables overlap one another.
1238 : // Either it succeeds and its keys are committed to the DB, or it fails and
1239 : // the keys are not committed.
1240 0 : if !g.keyManager.doObjectBoundsOverlap(batchIDs) {
1241 0 : // This ingestion will succeed.
1242 0 : //
1243 0 : // The batches we're ingesting may contain single delete tombstones that
1244 0 : // when applied to the writer result in nondeterminism in the deleted key.
1245 0 : // If that's the case, we can restore determinism by first deleting the keys
1246 0 : // from the writer.
1247 0 : //
1248 0 : // Generating additional operations here is not ideal, but it simplifies
1249 0 : // single delete invariants significantly.
1250 0 : for _, batchID := range batchIDs {
1251 0 : singleDeleteConflicts := g.keyManager.checkForSingleDelConflicts(batchID, dbID, true /* collapsed */)
1252 0 : for _, conflict := range singleDeleteConflicts {
1253 0 : g.add(&deleteOp{
1254 0 : writerID: dbID,
1255 0 : key: conflict,
1256 0 : derivedDBID: dbID,
1257 0 : })
1258 0 : }
1259 : }
1260 : }
1261 0 : g.add(&ingestOp{
1262 0 : dbID: dbID,
1263 0 : batchIDs: batchIDs,
1264 0 : derivedDBIDs: derivedDBIDs,
1265 0 : })
1266 : }
1267 :
1268 0 : func (g *generator) writerIngestAndExcise() {
1269 0 : if len(g.liveBatches) == 0 {
1270 0 : return
1271 0 : }
1272 :
1273 0 : dbID := g.dbs.rand(g.rng)
1274 0 : batchID := g.liveBatches.rand(g.rng)
1275 0 : g.removeBatchFromGenerator(batchID)
1276 0 :
1277 0 : start, end := g.prefixKeyRange()
1278 0 : derivedDBID := g.objDB[batchID]
1279 0 :
1280 0 : // Check for any single delete conflicts. If this batch is single-deleting
1281 0 : // a key that isn't safe to single delete in the underlying db, _and_ this
1282 0 : // key is not in the excise span, we add a delete before the ingestAndExcise.
1283 0 : singleDeleteConflicts := g.keyManager.checkForSingleDelConflicts(batchID, dbID, true /* collapsed */)
1284 0 : for _, conflict := range singleDeleteConflicts {
1285 0 : if g.cmp(conflict, start) >= 0 && g.cmp(conflict, end) < 0 {
1286 0 : // This key will get excised anyway.
1287 0 : continue
1288 : }
1289 0 : g.add(&deleteOp{
1290 0 : writerID: dbID,
1291 0 : key: conflict,
1292 0 : derivedDBID: dbID,
1293 0 : })
1294 : }
1295 :
1296 0 : g.add(&ingestAndExciseOp{
1297 0 : dbID: dbID,
1298 0 : batchID: batchID,
1299 0 : derivedDBID: derivedDBID,
1300 0 : exciseStart: start,
1301 0 : exciseEnd: end,
1302 0 : sstContainsExciseTombstone: g.rng.Intn(2) == 0,
1303 0 : })
1304 : }
1305 :
1306 0 : func (g *generator) writerIngestExternalFiles() {
1307 0 : if len(g.externalObjects) == 0 {
1308 0 : return
1309 0 : }
1310 0 : dbID := g.dbs.rand(g.rng)
1311 0 : numFiles := 1 + g.expRandInt(1)
1312 0 : objs := make([]externalObjWithBounds, numFiles)
1313 0 :
1314 0 : // We generate the parameters in multiple passes:
1315 0 : // 1. Generate objs with random start and end keys. Their bounds can overlap.
1316 0 : // 2. Sort objects by the start bound and trim the bounds to remove overlap.
1317 0 : // 3. Remove any objects where the previous step resulted in empty bounds.
1318 0 : // 4. Randomly add synthetic suffixes.
1319 0 :
1320 0 : for i := range objs {
1321 0 : // We allow the same object to be selected multiple times.
1322 0 : id := g.externalObjects.rand(g.rng)
1323 0 : b := g.keyManager.objKeyMeta(id).bounds
1324 0 :
1325 0 : objStart := g.prefix(b.smallest)
1326 0 : objEnd := g.prefix(b.largest)
1327 0 : if !b.largestExcl || len(objEnd) != len(b.largest) {
1328 0 : // Move up the end key a bit by appending a few letters to the prefix.
1329 0 : objEnd = append(objEnd, randBytes(g.rng, 1, 3)...)
1330 0 : }
1331 0 : if g.cmp(objStart, objEnd) >= 0 {
1332 0 : panic("bug in generating obj bounds")
1333 : }
1334 : // Generate two random keys within the given bounds.
1335 : // First, generate a start key in the range [objStart, objEnd).
1336 0 : start := g.keyGenerator.RandKeyInRange(0.01, pebble.KeyRange{
1337 0 : Start: objStart,
1338 0 : End: objEnd,
1339 0 : })
1340 0 : start = g.prefix(start)
1341 0 : // Second, generate an end key in the range (start, objEnd]. To do this, we
1342 0 : // generate a key in the range [start, objEnd) and if we get `start`, we
1343 0 : // remap that to `objEnd`.
1344 0 : end := g.keyGenerator.RandKeyInRange(0.01, pebble.KeyRange{
1345 0 : Start: start,
1346 0 : End: objEnd,
1347 0 : })
1348 0 : end = g.prefix(end)
1349 0 : if g.cmp(start, end) == 0 {
1350 0 : end = objEnd
1351 0 : }
1352 : // Randomly set up synthetic prefix.
1353 0 : var syntheticPrefix sstable.SyntheticPrefix
1354 0 : // We can only use a synthetic prefix if we don't have range dels.
1355 0 : // TODO(radu): we will want to support this at some point.
1356 0 : if !g.keyManager.objKeyMeta(id).hasRangeDels && g.rng.Intn(2) == 0 {
1357 0 : syntheticPrefix = randBytes(g.rng, 1, 5)
1358 0 : start = syntheticPrefix.Apply(start)
1359 0 : end = syntheticPrefix.Apply(end)
1360 0 : }
1361 :
1362 0 : objs[i] = externalObjWithBounds{
1363 0 : externalObjID: id,
1364 0 : bounds: pebble.KeyRange{
1365 0 : Start: start,
1366 0 : End: end,
1367 0 : },
1368 0 : syntheticPrefix: syntheticPrefix,
1369 0 : }
1370 : }
1371 :
1372 : // Sort by start bound.
1373 0 : slices.SortFunc(objs, func(a, b externalObjWithBounds) int {
1374 0 : return g.cmp(a.bounds.Start, b.bounds.Start)
1375 0 : })
1376 :
1377 : // Trim bounds so that there is no overlap.
1378 0 : for i := 0; i < len(objs)-1; i++ {
1379 0 : if g.cmp(objs[i].bounds.End, objs[i+1].bounds.Start) > 0 {
1380 0 : objs[i].bounds.End = objs[i+1].bounds.Start
1381 0 : }
1382 : }
1383 : // Some bounds might be empty now, remove those objects altogether. Note that
1384 : // the last object is unmodified, so at least that object will remain.
1385 0 : objs = slices.DeleteFunc(objs, func(o externalObjWithBounds) bool {
1386 0 : return g.cmp(o.bounds.Start, o.bounds.End) >= 0
1387 0 : })
1388 :
1389 : // Randomly set synthetic suffixes.
1390 0 : for i := range objs {
1391 0 : if g.rng.Intn(2) == 0 {
1392 0 : // We can only use a synthetic suffix if we don't have range dels.
1393 0 : // TODO(radu): we will want to support this at some point.
1394 0 : if g.keyManager.objKeyMeta(objs[i].externalObjID).hasRangeDels {
1395 0 : continue
1396 : }
1397 :
1398 : // We can only use a synthetic suffix if we don't have multiple keys with
1399 : // the same prefix.
1400 0 : hasDuplicatePrefix := func() bool {
1401 0 : var prevPrefix []byte
1402 0 : for _, k := range g.keyManager.KeysForExternalIngest(objs[i]) {
1403 0 : prefix := g.prefix(k.key)
1404 0 : if g.cmp(prefix, prevPrefix) == 0 {
1405 0 : return true
1406 0 : }
1407 0 : prevPrefix = append(prevPrefix[:0], prefix...)
1408 : }
1409 0 : return false
1410 : }()
1411 0 : if hasDuplicatePrefix {
1412 0 : continue
1413 : }
1414 :
1415 : // Generate a suffix that sorts before any previously generated suffix.
1416 0 : objs[i].syntheticSuffix = g.keyGenerator.IncMaxSuffix()
1417 : }
1418 : }
1419 :
1420 : // The batches we're ingesting may contain single delete tombstones that when
1421 : // applied to the db result in nondeterminism in the deleted key. If that's
1422 : // the case, we can restore determinism by first deleting the keys from the
1423 : // db.
1424 : //
1425 : // Generating additional operations here is not ideal, but it simplifies
1426 : // single delete invariants significantly.
1427 0 : dbKeys := g.keyManager.objKeyMeta(dbID)
1428 0 : for _, o := range objs {
1429 0 : for _, src := range g.keyManager.KeysForExternalIngest(o) {
1430 0 : if g.keyManager.checkForSingleDelConflict(src, dbKeys) {
1431 0 : g.add(&deleteOp{
1432 0 : writerID: dbID,
1433 0 : key: src.key,
1434 0 : derivedDBID: dbID,
1435 0 : })
1436 0 : }
1437 : }
1438 : }
1439 :
1440 : // Shuffle the objects.
1441 0 : g.rng.Shuffle(len(objs), func(i, j int) {
1442 0 : objs[i], objs[j] = objs[j], objs[i]
1443 0 : })
1444 :
1445 0 : g.add(&ingestExternalFilesOp{
1446 0 : dbID: dbID,
1447 0 : objs: objs,
1448 0 : })
1449 : }
1450 :
1451 0 : func (g *generator) writerLogData() {
1452 0 : if len(g.liveWriters) == 0 {
1453 0 : return
1454 0 : }
1455 0 : g.add(&logDataOp{
1456 0 : writerID: g.liveWriters.rand(g.rng),
1457 0 : data: randBytes(g.rng, 0, g.expRandInt(10)),
1458 0 : })
1459 : }
1460 :
1461 0 : func (g *generator) writerMerge() {
1462 0 : if len(g.liveWriters) == 0 {
1463 0 : return
1464 0 : }
1465 :
1466 0 : writerID := g.liveWriters.rand(g.rng)
1467 0 : g.add(&mergeOp{
1468 0 : writerID: writerID,
1469 0 : // 20% new keys.
1470 0 : key: g.keyGenerator.RandKey(0.2),
1471 0 : value: randBytes(g.rng, 0, maxValueSize),
1472 0 : })
1473 : }
1474 :
1475 0 : func (g *generator) writerSet() {
1476 0 : if len(g.liveWriters) == 0 {
1477 0 : return
1478 0 : }
1479 :
1480 0 : writerID := g.liveWriters.rand(g.rng)
1481 0 : g.add(&setOp{
1482 0 : writerID: writerID,
1483 0 : // 50% new keys.
1484 0 : key: g.keyGenerator.RandKey(0.5),
1485 0 : value: randBytes(g.rng, 0, maxValueSize),
1486 0 : })
1487 : }
1488 :
1489 0 : func (g *generator) writerSingleDelete() {
1490 0 : if len(g.liveWriters) == 0 {
1491 0 : return
1492 0 : }
1493 :
1494 0 : writerID := g.liveWriters.rand(g.rng)
1495 0 : key := g.randKeyToSingleDelete(writerID)
1496 0 : if key == nil {
1497 0 : return
1498 0 : }
1499 0 : g.add(&singleDeleteOp{
1500 0 : writerID: writerID,
1501 0 : key: key,
1502 0 : // Keys eligible for single deletes can be removed with a regular
1503 0 : // delete. Mutate a percentage of SINGLEDEL ops into DELETEs. Note that
1504 0 : // here we are only determining whether the replacement *could* happen.
1505 0 : // At test runtime, the `replaceSingleDelete` test option must also be
1506 0 : // set to true for the single delete to be replaced.
1507 0 : maybeReplaceDelete: g.rng.Float64() < 0.25,
1508 0 : })
1509 : }
1510 :
1511 0 : func (g *generator) maybeMutateOptions(readerID objID, opts *iterOpts) {
1512 0 : // With 95% probability, allow changes to any options at all. This ensures
1513 0 : // that in 5% of cases there are no changes, and SetOptions hits its fast
1514 0 : // path.
1515 0 : if g.rng.Intn(100) >= 5 {
1516 0 : if !g.maybeSetSnapshotIterBounds(readerID, opts) {
1517 0 : // With 1/3 probability, clear existing bounds.
1518 0 : if opts.lower != nil && g.rng.Intn(3) == 0 {
1519 0 : opts.lower = nil
1520 0 : }
1521 0 : if opts.upper != nil && g.rng.Intn(3) == 0 {
1522 0 : opts.upper = nil
1523 0 : }
1524 : // With 1/3 probability, update the bounds.
1525 0 : if g.rng.Intn(3) == 0 {
1526 0 : // Generate a new key with a .1% probability.
1527 0 : opts.lower = g.keyGenerator.RandKey(0.001)
1528 0 : }
1529 0 : if g.rng.Intn(3) == 0 {
1530 0 : // Generate a new key with a .1% probability.
1531 0 : opts.upper = g.keyGenerator.RandKey(0.001)
1532 0 : }
1533 0 : if g.cmp(opts.lower, opts.upper) > 0 {
1534 0 : opts.lower, opts.upper = opts.upper, opts.lower
1535 0 : }
1536 : }
1537 :
1538 : // With 1/3 probability, update the key-types/mask.
1539 0 : if g.rng.Intn(3) == 0 {
1540 0 : opts.keyTypes, opts.maskSuffix = g.randKeyTypesAndMask()
1541 0 : }
1542 :
1543 : // With 1/3 probability, clear existing filter.
1544 0 : if opts.filterMax > 0 && g.rng.Intn(3) == 0 {
1545 0 : opts.filterMax, opts.filterMin = 0, 0
1546 0 : }
1547 : // With 10% probability, set a filter range.
1548 0 : if g.rng.Intn(10) == 1 {
1549 0 : opts.filterMin = uint64(g.keyGenerator.UniformSuffixInt() + 1)
1550 0 : opts.filterMax = uint64(g.keyGenerator.UniformSuffixInt() + 1)
1551 0 : if opts.filterMin > opts.filterMax {
1552 0 : opts.filterMin, opts.filterMax = opts.filterMax, opts.filterMin
1553 0 : } else if opts.filterMin == opts.filterMax {
1554 0 : opts.filterMax = opts.filterMin + 1
1555 0 : }
1556 : }
1557 : // With 10% probability, flip enablement of L6 filters.
1558 0 : if g.rng.Float64() <= 0.1 {
1559 0 : opts.useL6Filters = !opts.useL6Filters
1560 0 : }
1561 : }
1562 : }
1563 :
1564 0 : func (g *generator) cmp(a, b []byte) int {
1565 0 : return g.keyManager.comparer.Compare(a, b)
1566 0 : }
1567 :
1568 0 : func (g *generator) prefix(a []byte) []byte {
1569 0 : n := g.keyManager.comparer.Split(a)
1570 0 : return a[:n:n]
1571 0 : }
1572 :
1573 0 : func (g *generator) String() string {
1574 0 : var buf bytes.Buffer
1575 0 : for _, op := range g.ops {
1576 0 : fmt.Fprintf(&buf, "%s\n", op)
1577 0 : }
1578 0 : return buf.String()
1579 : }
1580 :
1581 : // expRandInt returns a random non-negative integer using the exponential
1582 : // distribution with the given mean. This is useful when we usually want to test
1583 : // with small values, but we want to occasionally test with a larger value.
1584 : //
1585 : // Large integers are exponentially less likely than small integers;
1586 : // specifically, the probability decreases by a factor of `e` every `mean`
1587 : // values.
1588 0 : func (g *generator) expRandInt(mean int) int {
1589 0 : return int(math.Round(g.rng.ExpFloat64() * float64(mean)))
1590 0 : }
|