LCOV - code coverage report
Current view: top level - pebble/internal/randvar - zipf.go (source / functions) Hit Total Coverage
Test: 2023-12-11 08:16Z f16e0f48 - meta test only.lcov Lines: 25 65 38.5 %
Date: 2023-12-11 08:16:49 Functions: 0 0 -

          Line data    Source code
       1             : // Copyright 2017 The Cockroach Authors.
       2             : //
       3             : // Licensed under the Apache License, Version 2.0 (the "License");
       4             : // you may not use this file except in compliance with the License.
       5             : // You may obtain a copy of the License at
       6             : //
       7             : //     http://www.apache.org/licenses/LICENSE-2.0
       8             : //
       9             : // Unless required by applicable law or agreed to in writing, software
      10             : // distributed under the License is distributed on an "AS IS" BASIS,
      11             : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      12             : // implied. See the License for the specific language governing
      13             : // permissions and limitations under the License. See the AUTHORS file
      14             : // for names of contributors.
      15             : //
      16             : // ZipfGenerator implements the Incrementing Zipfian Random Number Generator from
      17             : // [1]: "Quickly Generating Billion-Record Synthetic Databases"
      18             : // by Gray, Sundaresan, Englert, Baclawski, and Weinberger, SIGMOD 1994.
      19             : 
      20             : package randvar
      21             : 
      22             : import (
      23             :         "math"
      24             :         "sync"
      25             : 
      26             :         "github.com/cockroachdb/errors"
      27             :         "golang.org/x/exp/rand"
      28             : )
      29             : 
      30             : const (
      31             :         // See https://github.com/brianfrankcooper/YCSB/blob/f886c1e7988f8f4965cb88a1fe2f6bad2c61b56d/core/src/main/java/com/yahoo/ycsb/generator/ScrambledZipfianGenerator.java#L33-L35
      32             :         defaultMax   = 10000000000
      33             :         defaultTheta = 0.99
      34             :         defaultZetaN = 26.46902820178302
      35             : )
      36             : 
      37             : // Zipf is a random number generator that generates random numbers from a Zipf
      38             : // distribution. Unlike rand.Zipf, this generator supports incrementing the max
      39             : // parameter without performing an expensive recomputation of the underlying
      40             : // hidden parameters, which is a pattern used in [1] for efficiently generating
      41             : // large volumes of Zipf-distributed records for synthetic data. Second,
      42             : // rand.Zipf only supports theta <= 1, we suppose all values of theta.
      43             : type Zipf struct {
      44             :         // Supplied constants.
      45             :         theta float64
      46             :         min   uint64
      47             :         // Internally computed constants.
      48             :         alpha, zeta2 float64
      49             :         halfPowTheta float64
      50             :         // Mutable state.
      51             :         mu struct {
      52             :                 sync.RWMutex
      53             :                 max   uint64
      54             :                 eta   float64
      55             :                 zetaN float64
      56             :         }
      57             : }
      58             : 
      59             : // NewDefaultZipf constructs a new Zipf generator with the default parameters.
      60           0 : func NewDefaultZipf() (*Zipf, error) {
      61           0 :         return NewZipf(1, defaultMax, defaultTheta)
      62           0 : }
      63             : 
      64             : // NewZipf constructs a new Zipf generator with the given parameters.  Returns
      65             : // an error if the parameters are outside the accepted range.
      66           1 : func NewZipf(min, max uint64, theta float64) (*Zipf, error) {
      67           1 :         if min > max {
      68           0 :                 return nil, errors.Errorf("min %d > max %d", errors.Safe(min), errors.Safe(max))
      69           0 :         }
      70           1 :         if theta < 0.0 || theta == 1.0 {
      71           0 :                 return nil, errors.New("0 < theta, and theta != 1")
      72           0 :         }
      73             : 
      74           1 :         z := &Zipf{
      75           1 :                 min:   min,
      76           1 :                 theta: theta,
      77           1 :         }
      78           1 :         z.mu.max = max
      79           1 : 
      80           1 :         // Compute hidden parameters.
      81           1 :         z.zeta2 = computeZetaFromScratch(2, theta)
      82           1 :         z.halfPowTheta = 1.0 + math.Pow(0.5, z.theta)
      83           1 :         z.mu.zetaN = computeZetaFromScratch(max+1-min, theta)
      84           1 :         z.alpha = 1.0 / (1.0 - theta)
      85           1 :         z.mu.eta = (1 - math.Pow(2.0/float64(z.mu.max+1-z.min), 1.0-theta)) / (1.0 - z.zeta2/z.mu.zetaN)
      86           1 :         return z, nil
      87             : }
      88             : 
      89             : // computeZetaIncrementally recomputes zeta(max, theta), assuming that sum =
      90             : // zeta(oldMax, theta). Returns zeta(max, theta), computed incrementally.
      91           1 : func computeZetaIncrementally(oldMax, max uint64, theta float64, sum float64) float64 {
      92           1 :         if max < oldMax {
      93           0 :                 panic("unable to decrement max!")
      94             :         }
      95           1 :         for i := oldMax + 1; i <= max; i++ {
      96           1 :                 sum += 1.0 / math.Pow(float64(i), theta)
      97           1 :         }
      98           1 :         return sum
      99             : }
     100             : 
     101             : // The function zeta computes the value
     102             : // zeta(n, theta) = (1/1)^theta + (1/2)^theta + (1/3)^theta + ... + (1/n)^theta
     103           1 : func computeZetaFromScratch(n uint64, theta float64) float64 {
     104           1 :         if n == defaultMax && theta == defaultTheta {
     105           0 :                 // Precomputed value, borrowed from ScrambledZipfianGenerator.java. This is
     106           0 :                 // quite slow to calculate from scratch due to the large n value.
     107           0 :                 return defaultZetaN
     108           0 :         }
     109           1 :         return computeZetaIncrementally(0, n, theta, 0.0)
     110             : }
     111             : 
     112             : // IncMax increments max and recomputes the internal values that depend on
     113             : // it. Returns an error if the recomputation failed.
     114           0 : func (z *Zipf) IncMax(delta int) {
     115           0 :         z.mu.Lock()
     116           0 :         oldMax := z.mu.max
     117           0 :         z.mu.max += uint64(delta)
     118           0 :         z.mu.zetaN = computeZetaIncrementally(oldMax+1-z.min, z.mu.max+1-z.min, z.theta, z.mu.zetaN)
     119           0 :         z.mu.eta = (1 - math.Pow(2.0/float64(z.mu.max+1-z.min), 1.0-z.theta)) / (1.0 - z.zeta2/z.mu.zetaN)
     120           0 :         z.mu.Unlock()
     121           0 : }
     122             : 
     123             : // Max returns the max.
     124           0 : func (z *Zipf) Max() uint64 {
     125           0 :         z.mu.Lock()
     126           0 :         defer z.mu.Unlock()
     127           0 :         return z.mu.max
     128           0 : }
     129             : 
     130             : // Uint64 draws a new value between min and max, with probabilities according
     131             : // to the Zipf distribution.
     132           0 : func (z *Zipf) Uint64(rng *rand.Rand) uint64 {
     133           0 :         u := rng.Float64()
     134           0 :         z.mu.RLock()
     135           0 :         uz := u * z.mu.zetaN
     136           0 :         var result uint64
     137           0 :         if uz < 1.0 {
     138           0 :                 result = z.min
     139           0 :         } else if uz < z.halfPowTheta {
     140           0 :                 result = z.min + 1
     141           0 :         } else {
     142           0 :                 spread := float64(z.mu.max + 1 - z.min)
     143           0 :                 result = z.min + uint64(spread*math.Pow(z.mu.eta*u-z.mu.eta+1.0, z.alpha))
     144           0 :         }
     145           0 :         z.mu.RUnlock()
     146           0 :         return result
     147             : }

Generated by: LCOV version 1.14