Coverage Report

Created: 2025-10-26 07:13

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/rocksdb/env/unique_id_gen.cc
Line
Count
Source
1
//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2
//  This source code is licensed under both the GPLv2 (found in the
3
//  COPYING file in the root directory) and Apache 2.0 License
4
//  (found in the LICENSE.Apache file in the root directory).
5
6
#include "env/unique_id_gen.h"
7
8
#include <algorithm>
9
#include <array>
10
#include <atomic>
11
#include <cstdint>
12
#include <cstring>
13
#include <random>
14
15
#include "port/lang.h"
16
#include "port/port.h"
17
#include "rocksdb/env.h"
18
#include "rocksdb/version.h"
19
#include "util/hash.h"
20
21
#ifdef __SSE4_2__
22
#ifdef _WIN32
23
#include <intrin.h>
24
#define _rdtsc() __rdtsc()
25
#else
26
#include <x86intrin.h>
27
#endif
28
#else
29
#include "rocksdb/system_clock.h"
30
#endif
31
32
namespace ROCKSDB_NAMESPACE {
33
34
namespace {
35
36
struct GenerateRawUniqueIdOpts {
37
  Env* env = Env::Default();
38
  bool exclude_port_uuid = false;
39
  bool exclude_env_details = false;
40
  bool exclude_random_device = false;
41
};
42
43
// Each of these "tracks" below should be sufficient for generating 128 bits
44
// of entropy, after hashing the raw bytes. The tracks are separable for
45
// testing purposes, but in production we combine as many tracks as possible
46
// to ensure quality results even if some environments have degraded
47
// capabilities or quality in some APIs.
48
//
49
// This approach has not been validated for use in cryptography. The goal is
50
// generating globally unique values with high probability without coordination
51
// between instances.
52
//
53
// Linux performance: EntropyTrackRandomDevice is much faster than
54
// EntropyTrackEnvDetails, which is much faster than EntropyTrackPortUuid.
55
56
struct EntropyTrackPortUuid {
57
  std::array<char, 36> uuid;
58
59
4
  void Populate(const GenerateRawUniqueIdOpts& opts) {
60
4
    if (opts.exclude_port_uuid) {
61
0
      return;
62
0
    }
63
4
    std::string s;
64
4
    port::GenerateRfcUuid(&s);
65
4
    if (s.size() >= uuid.size()) {
66
4
      std::copy_n(s.begin(), uuid.size(), uuid.begin());
67
4
    }
68
4
  }
69
};
70
71
struct EntropyTrackEnvDetails {
72
  std::array<char, 64> hostname_buf;
73
  int64_t process_id;
74
  uint64_t thread_id;
75
  int64_t unix_time;
76
  uint64_t nano_time;
77
78
4
  void Populate(const GenerateRawUniqueIdOpts& opts) {
79
4
    if (opts.exclude_env_details) {
80
0
      return;
81
0
    }
82
4
    opts.env->GetHostName(hostname_buf.data(), hostname_buf.size())
83
4
        .PermitUncheckedError();
84
4
    process_id = port::GetProcessID();
85
4
    thread_id = opts.env->GetThreadID();
86
4
    opts.env->GetCurrentTime(&unix_time).PermitUncheckedError();
87
4
    nano_time = opts.env->NowNanos();
88
4
  }
89
};
90
91
struct EntropyTrackRandomDevice {
92
  using RandType = std::random_device::result_type;
93
  static constexpr size_t kNumRandVals =
94
      /* generous bits */ 192U / (8U * sizeof(RandType));
95
  std::array<RandType, kNumRandVals> rand_vals;
96
97
4
  void Populate(const GenerateRawUniqueIdOpts& opts) {
98
4
    if (opts.exclude_random_device) {
99
0
      return;
100
0
    }
101
4
    std::random_device r;
102
24
    for (auto& val : rand_vals) {
103
24
      val = r();
104
24
    }
105
4
  }
106
};
107
108
struct Entropy {
109
  uint64_t version_identifier;
110
  EntropyTrackRandomDevice et1;
111
  EntropyTrackEnvDetails et2;
112
  EntropyTrackPortUuid et3;
113
114
4
  void Populate(const GenerateRawUniqueIdOpts& opts) {
115
    // If we change the format of what goes into the entropy inputs, it's
116
    // conceivable there could be a physical collision in the hash input
117
    // even though they are logically different. This value should change
118
    // if there's a change to the "schema" here, including byte order.
119
4
    version_identifier = (uint64_t{ROCKSDB_MAJOR} << 32) +
120
4
                         (uint64_t{ROCKSDB_MINOR} << 16) +
121
4
                         uint64_t{ROCKSDB_PATCH};
122
4
    et1.Populate(opts);
123
4
    et2.Populate(opts);
124
4
    et3.Populate(opts);
125
4
  }
126
};
127
128
void GenerateRawUniqueIdImpl(uint64_t* a, uint64_t* b,
129
4
                             const GenerateRawUniqueIdOpts& opts) {
130
4
  Entropy e;
131
4
  std::memset(&e, 0, sizeof(e));
132
4
  e.Populate(opts);
133
4
  Hash2x64(reinterpret_cast<const char*>(&e), sizeof(e), a, b);
134
4
}
135
136
}  // namespace
137
138
4
void GenerateRawUniqueId(uint64_t* a, uint64_t* b, bool exclude_port_uuid) {
139
4
  GenerateRawUniqueIdOpts opts;
140
4
  opts.exclude_port_uuid = exclude_port_uuid;
141
4
  assert(!opts.exclude_env_details);
142
4
  assert(!opts.exclude_random_device);
143
4
  GenerateRawUniqueIdImpl(a, b, opts);
144
4
}
145
146
#ifndef NDEBUG
147
void TEST_GenerateRawUniqueId(uint64_t* a, uint64_t* b, bool exclude_port_uuid,
148
                              bool exclude_env_details,
149
                              bool exclude_random_device) {
150
  GenerateRawUniqueIdOpts opts;
151
  opts.exclude_port_uuid = exclude_port_uuid;
152
  opts.exclude_env_details = exclude_env_details;
153
  opts.exclude_random_device = exclude_random_device;
154
  GenerateRawUniqueIdImpl(a, b, opts);
155
}
156
#endif
157
158
4
void SemiStructuredUniqueIdGen::Reset() {
159
4
  saved_process_id_ = port::GetProcessID();
160
4
  GenerateRawUniqueId(&base_upper_, &base_lower_);
161
4
  counter_ = 0;
162
4
}
163
164
48.4k
void SemiStructuredUniqueIdGen::GenerateNext(uint64_t* upper, uint64_t* lower) {
165
48.4k
  if (port::GetProcessID() == saved_process_id_) {
166
    // Safe to increment the atomic for guaranteed uniqueness within this
167
    // process lifetime. Xor slightly better than +. See
168
    // https://github.com/pdillinger/unique_id
169
48.4k
    *lower = base_lower_ ^ counter_.fetch_add(1);
170
48.4k
    *upper = base_upper_;
171
48.4k
  } else {
172
    // There must have been a fork() or something. Rather than attempting to
173
    // update in a thread-safe way, simply fall back on GenerateRawUniqueId.
174
0
    GenerateRawUniqueId(upper, lower);
175
0
  }
176
48.4k
}
177
178
0
void UnpredictableUniqueIdGen::Reset() {
179
0
  for (size_t i = 0; i < pool_.size(); i += 2) {
180
0
    assert(i + 1 < pool_.size());
181
0
    uint64_t a, b;
182
0
    GenerateRawUniqueId(&a, &b);
183
0
    pool_[i] = a;
184
0
    pool_[i + 1] = b;
185
0
  }
186
0
}
187
188
0
void UnpredictableUniqueIdGen::GenerateNext(uint64_t* upper, uint64_t* lower) {
189
0
  uint64_t extra_entropy;
190
  // Use timing information (if available) to add to entropy. (Not a disaster
191
  // if unavailable on some platforms. High performance is important.)
192
0
#ifdef __SSE4_2__  // More than enough to guarantee rdtsc instruction
193
0
  extra_entropy = static_cast<uint64_t>(_rdtsc());
194
#else
195
  extra_entropy = SystemClock::Default()->NowNanos();
196
#endif
197
198
0
  GenerateNextWithEntropy(upper, lower, extra_entropy);
199
0
}
200
201
void UnpredictableUniqueIdGen::GenerateNextWithEntropy(uint64_t* upper,
202
                                                       uint64_t* lower,
203
0
                                                       uint64_t extra_entropy) {
204
  // To efficiently ensure unique inputs to the hash function in the presence
205
  // of multithreading, we do not require atomicity on the whole entropy pool,
206
  // but instead only a piece of it (a 64-bit counter) that is sufficient to
207
  // guarantee uniqueness.
208
0
  uint64_t count = counter_.fetch_add(1, std::memory_order_relaxed);
209
0
  uint64_t a = count;
210
0
  uint64_t b = extra_entropy;
211
  // Invoking the hash function several times avoids copying all the inputs
212
  // to a contiguous, non-atomic buffer.
213
0
  BijectiveHash2x64(a, b, &a, &b);  // Based on XXH128
214
215
  // In hashing the rest of the pool with that, we don't need to worry about
216
  // races, but use atomic operations for sanitizer-friendliness.
217
0
  for (size_t i = 0; i < pool_.size(); i += 2) {
218
0
    assert(i + 1 < pool_.size());
219
0
    a ^= pool_[i].load(std::memory_order_relaxed);
220
0
    b ^= pool_[i + 1].load(std::memory_order_relaxed);
221
0
    BijectiveHash2x64(a, b, &a, &b);  // Based on XXH128
222
0
  }
223
224
  // Return result
225
0
  *lower = a;
226
0
  *upper = b;
227
228
  // Add some back into pool. We don't really care that there's a race in
229
  // storing the result back and another thread computing the next value.
230
  // It's just an entropy pool.
231
0
  pool_[count & (pool_.size() - 1)].fetch_add(a, std::memory_order_relaxed);
232
0
}
233
234
#ifndef NDEBUG
235
UnpredictableUniqueIdGen::UnpredictableUniqueIdGen(TEST_ZeroInitialized) {
236
  for (auto& p : pool_) {
237
    p.store(0);
238
  }
239
  counter_.store(0);
240
}
241
#endif
242
243
}  // namespace ROCKSDB_NAMESPACE