/src/abseil-cpp/absl/time/clock.cc
Line  | Count  | Source  | 
1  |  | // Copyright 2017 The Abseil Authors.  | 
2  |  | //  | 
3  |  | // Licensed under the Apache License, Version 2.0 (the "License");  | 
4  |  | // you may not use this file except in compliance with the License.  | 
5  |  | // You may obtain a copy of the License at  | 
6  |  | //  | 
7  |  | //      https://www.apache.org/licenses/LICENSE-2.0  | 
8  |  | //  | 
9  |  | // Unless required by applicable law or agreed to in writing, software  | 
10  |  | // distributed under the License is distributed on an "AS IS" BASIS,  | 
11  |  | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  | 
12  |  | // See the License for the specific language governing permissions and  | 
13  |  | // limitations under the License.  | 
14  |  |  | 
15  |  | #include "absl/time/clock.h"  | 
16  |  |  | 
17  |  | #include "absl/base/attributes.h"  | 
18  |  | #include "absl/base/optimization.h"  | 
19  |  |  | 
20  |  | #ifdef _WIN32  | 
21  |  | #include <windows.h>  | 
22  |  | #endif  | 
23  |  |  | 
24  |  | #include <algorithm>  | 
25  |  | #include <atomic>  | 
26  |  | #include <cerrno>  | 
27  |  | #include <cstdint>  | 
28  |  | #include <ctime>  | 
29  |  | #include <limits>  | 
30  |  |  | 
31  |  | #include "absl/base/internal/spinlock.h"  | 
32  |  | #include "absl/base/internal/unscaledcycleclock.h"  | 
33  |  | #include "absl/base/macros.h"  | 
34  |  | #include "absl/base/port.h"  | 
35  |  | #include "absl/base/thread_annotations.h"  | 
36  |  |  | 
37  |  | namespace absl { | 
38  |  | ABSL_NAMESPACE_BEGIN  | 
39  | 4.12M  | Time Now() { | 
40  |  |   // TODO(bww): Get a timespec instead so we don't have to divide.  | 
41  | 4.12M  |   int64_t n = absl::GetCurrentTimeNanos();  | 
42  | 4.12M  |   if (n >= 0) { | 
43  | 4.12M  |     return time_internal::FromUnixDuration(  | 
44  | 4.12M  |         time_internal::MakeDuration(n / 1000000000, n % 1000000000 * 4));  | 
45  | 4.12M  |   }  | 
46  | 0  |   return time_internal::FromUnixDuration(absl::Nanoseconds(n));  | 
47  | 4.12M  | }  | 
48  |  | ABSL_NAMESPACE_END  | 
49  |  | }  // namespace absl  | 
50  |  |  | 
51  |  | // Decide if we should use the fast GetCurrentTimeNanos() algorithm based on the  | 
52  |  | // cyclecounter, otherwise just get the time directly from the OS on every call.  | 
53  |  | // By default, the fast algorithm based on the cyclecount is disabled because in  | 
54  |  | // certain situations, for example, if the OS enters a "sleep" mode, it may  | 
55  |  | // produce incorrect values immediately upon waking.  | 
56  |  | // This can be chosen at compile-time via  | 
57  |  | // -DABSL_USE_CYCLECLOCK_FOR_GET_CURRENT_TIME_NANOS=[0|1]  | 
58  |  | #ifndef ABSL_USE_CYCLECLOCK_FOR_GET_CURRENT_TIME_NANOS  | 
59  |  | #define ABSL_USE_CYCLECLOCK_FOR_GET_CURRENT_TIME_NANOS 0  | 
60  |  | #endif  | 
61  |  |  | 
62  |  | #if defined(__APPLE__) || defined(_WIN32)  | 
63  |  | #include "absl/time/internal/get_current_time_chrono.inc"  | 
64  |  | #else  | 
65  |  | #include "absl/time/internal/get_current_time_posix.inc"  | 
66  |  | #endif  | 
67  |  |  | 
68  |  | // Allows override by test.  | 
69  |  | #ifndef GET_CURRENT_TIME_NANOS_FROM_SYSTEM  | 
70  |  | #define GET_CURRENT_TIME_NANOS_FROM_SYSTEM() \  | 
71  | 4.12M  |   ::absl::time_internal::GetCurrentTimeNanosFromSystem()  | 
72  |  | #endif  | 
73  |  |  | 
74  |  | #if !ABSL_USE_CYCLECLOCK_FOR_GET_CURRENT_TIME_NANOS  | 
75  |  | namespace absl { | 
76  |  | ABSL_NAMESPACE_BEGIN  | 
77  | 4.12M  | int64_t GetCurrentTimeNanos() { return GET_CURRENT_TIME_NANOS_FROM_SYSTEM(); } | 
78  |  | ABSL_NAMESPACE_END  | 
79  |  | }  // namespace absl  | 
80  |  | #else  // Use the cyclecounter-based implementation below.  | 
81  |  |  | 
82  |  | // Allows override by test.  | 
83  |  | #ifndef GET_CURRENT_TIME_NANOS_CYCLECLOCK_NOW  | 
84  |  | #define GET_CURRENT_TIME_NANOS_CYCLECLOCK_NOW() \  | 
85  |  |   ::absl::time_internal::UnscaledCycleClockWrapperForGetCurrentTime::Now()  | 
86  |  | #endif  | 
87  |  |  | 
88  |  | namespace absl { | 
89  |  | ABSL_NAMESPACE_BEGIN  | 
90  |  | namespace time_internal { | 
91  |  |  | 
92  |  | // On some processors, consecutive reads of the cycle counter may yield the  | 
93  |  | // same value (weakly-increasing). In debug mode, clear the least significant  | 
94  |  | // bits to discourage depending on a strictly-increasing Now() value.  | 
95  |  | // In x86-64's debug mode, discourage depending on a strictly-increasing Now()  | 
96  |  | // value.  | 
97  |  | #if !defined(NDEBUG) && defined(__x86_64__)  | 
98  |  | constexpr int64_t kCycleClockNowMask = ~int64_t{0xff}; | 
99  |  | #else  | 
100  |  | constexpr int64_t kCycleClockNowMask = ~int64_t{0}; | 
101  |  | #endif  | 
102  |  |  | 
103  |  | // This is a friend wrapper around UnscaledCycleClock::Now()  | 
104  |  | // (needed to access UnscaledCycleClock).  | 
105  |  | class UnscaledCycleClockWrapperForGetCurrentTime { | 
106  |  |  public:  | 
107  |  |   static int64_t Now() { | 
108  |  |     return base_internal::UnscaledCycleClock::Now() & kCycleClockNowMask;  | 
109  |  |   }  | 
110  |  | };  | 
111  |  | }  // namespace time_internal  | 
112  |  |  | 
113  |  | // uint64_t is used in this module to provide an extra bit in multiplications  | 
114  |  |  | 
115  |  | // ---------------------------------------------------------------------  | 
116  |  | // An implementation of reader-write locks that use no atomic ops in the read  | 
117  |  | // case.  This is a generalization of Lamport's method for reading a multiword  | 
118  |  | // clock.  Increment a word on each write acquisition, using the low-order bit  | 
119  |  | // as a spinlock; the word is the high word of the "clock".  Readers read the  | 
120  |  | // high word, then all other data, then the high word again, and repeat the  | 
121  |  | // read if the reads of the high words yields different answers, or an odd  | 
122  |  | // value (either case suggests possible interference from a writer).  | 
123  |  | // Here we use a spinlock to ensure only one writer at a time, rather than  | 
124  |  | // spinning on the bottom bit of the word to benefit from SpinLock  | 
125  |  | // spin-delay tuning.  | 
126  |  |  | 
127  |  | // Acquire seqlock (*seq) and return the value to be written to unlock.  | 
128  |  | static inline uint64_t SeqAcquire(std::atomic<uint64_t>* seq) { | 
129  |  |   uint64_t x = seq->fetch_add(1, std::memory_order_relaxed);  | 
130  |  |  | 
131  |  |   // We put a release fence between update to *seq and writes to shared data.  | 
132  |  |   // Thus all stores to shared data are effectively release operations and  | 
133  |  |   // update to *seq above cannot be re-ordered past any of them.  Note that  | 
134  |  |   // this barrier is not for the fetch_add above.  A release barrier for the  | 
135  |  |   // fetch_add would be before it, not after.  | 
136  |  |   std::atomic_thread_fence(std::memory_order_release);  | 
137  |  |  | 
138  |  |   return x + 2;  // original word plus 2  | 
139  |  | }  | 
140  |  |  | 
141  |  | // Release seqlock (*seq) by writing x to it---a value previously returned by  | 
142  |  | // SeqAcquire.  | 
143  |  | static inline void SeqRelease(std::atomic<uint64_t>* seq, uint64_t x) { | 
144  |  |   // The unlock store to *seq must have release ordering so that all  | 
145  |  |   // updates to shared data must finish before this store.  | 
146  |  |   seq->store(x, std::memory_order_release);  // release lock for readers  | 
147  |  | }  | 
148  |  |  | 
149  |  | // ---------------------------------------------------------------------  | 
150  |  |  | 
151  |  | // "nsscaled" is unit of time equal to a (2**kScale)th of a nanosecond.  | 
152  |  | enum { kScale = 30 }; | 
153  |  |  | 
154  |  | // The minimum interval between samples of the time base.  | 
155  |  | // We pick enough time to amortize the cost of the sample,  | 
156  |  | // to get a reasonably accurate cycle counter rate reading,  | 
157  |  | // and not so much that calculations will overflow 64-bits.  | 
158  |  | static const uint64_t kMinNSBetweenSamples = 2000 << 20;  | 
159  |  |  | 
160  |  | // We require that kMinNSBetweenSamples shifted by kScale  | 
161  |  | // have at least a bit left over for 64-bit calculations.  | 
162  |  | static_assert(((kMinNSBetweenSamples << (kScale + 1)) >> (kScale + 1)) ==  | 
163  |  |                   kMinNSBetweenSamples,  | 
164  |  |               "cannot represent kMaxBetweenSamplesNSScaled");  | 
165  |  |  | 
166  |  | // data from a sample of the kernel's time value  | 
167  |  | struct TimeSampleAtomic { | 
168  |  |   std::atomic<uint64_t> raw_ns{0};              // raw kernel time | 
169  |  |   std::atomic<uint64_t> base_ns{0};             // our estimate of time | 
170  |  |   std::atomic<uint64_t> base_cycles{0};         // cycle counter reading | 
171  |  |   std::atomic<uint64_t> nsscaled_per_cycle{0};  // cycle period | 
172  |  |   // cycles before we'll sample again (a scaled reciprocal of the period,  | 
173  |  |   // to avoid a division on the fast path).  | 
174  |  |   std::atomic<uint64_t> min_cycles_per_sample{0}; | 
175  |  | };  | 
176  |  | // Same again, but with non-atomic types  | 
177  |  | struct TimeSample { | 
178  |  |   uint64_t raw_ns = 0;                 // raw kernel time  | 
179  |  |   uint64_t base_ns = 0;                // our estimate of time  | 
180  |  |   uint64_t base_cycles = 0;            // cycle counter reading  | 
181  |  |   uint64_t nsscaled_per_cycle = 0;     // cycle period  | 
182  |  |   uint64_t min_cycles_per_sample = 0;  // approx cycles before next sample  | 
183  |  | };  | 
184  |  |  | 
185  |  | struct ABSL_CACHELINE_ALIGNED TimeState { | 
186  |  |   std::atomic<uint64_t> seq{0}; | 
187  |  |   TimeSampleAtomic last_sample;  // the last sample; under seq  | 
188  |  |  | 
189  |  |   // The following counters are used only by the test code.  | 
190  |  |   int64_t stats_initializations{0}; | 
191  |  |   int64_t stats_reinitializations{0}; | 
192  |  |   int64_t stats_calibrations{0}; | 
193  |  |   int64_t stats_slow_paths{0}; | 
194  |  |   int64_t stats_fast_slow_paths{0}; | 
195  |  |  | 
196  |  |   uint64_t last_now_cycles ABSL_GUARDED_BY(lock){0}; | 
197  |  |  | 
198  |  |   // Used by GetCurrentTimeNanosFromKernel().  | 
199  |  |   // We try to read clock values at about the same time as the kernel clock.  | 
200  |  |   // This value gets adjusted up or down as estimate of how long that should  | 
201  |  |   // take, so we can reject attempts that take unusually long.  | 
202  |  |   std::atomic<uint64_t> approx_syscall_time_in_cycles{10 * 1000}; | 
203  |  |   // Number of times in a row we've seen a kernel time call take substantially  | 
204  |  |   // less than approx_syscall_time_in_cycles.  | 
205  |  |   std::atomic<uint32_t> kernel_time_seen_smaller{0}; | 
206  |  |  | 
207  |  |   // A reader-writer lock protecting the static locations below.  | 
208  |  |   // See SeqAcquire() and SeqRelease() above.  | 
209  |  |   absl::base_internal::SpinLock lock{base_internal::SCHEDULE_KERNEL_ONLY}; | 
210  |  | };  | 
211  |  | ABSL_CONST_INIT static TimeState time_state;  | 
212  |  |  | 
213  |  | // Return the time in ns as told by the kernel interface.  Place in *cycleclock  | 
214  |  | // the value of the cycleclock at about the time of the syscall.  | 
215  |  | // This call represents the time base that this module synchronizes to.  | 
216  |  | // Ensures that *cycleclock does not step back by up to (1 << 16) from  | 
217  |  | // last_cycleclock, to discard small backward counter steps.  (Larger steps are  | 
218  |  | // assumed to be complete resyncs, which shouldn't happen.  If they do, a full  | 
219  |  | // reinitialization of the outer algorithm should occur.)  | 
220  |  | static int64_t GetCurrentTimeNanosFromKernel(uint64_t last_cycleclock,  | 
221  |  |                                              uint64_t* cycleclock)  | 
222  |  |     ABSL_EXCLUSIVE_LOCKS_REQUIRED(time_state.lock) { | 
223  |  |   uint64_t local_approx_syscall_time_in_cycles =  // local copy  | 
224  |  |       time_state.approx_syscall_time_in_cycles.load(std::memory_order_relaxed);  | 
225  |  |  | 
226  |  |   int64_t current_time_nanos_from_system;  | 
227  |  |   uint64_t before_cycles;  | 
228  |  |   uint64_t after_cycles;  | 
229  |  |   uint64_t elapsed_cycles;  | 
230  |  |   int loops = 0;  | 
231  |  |   do { | 
232  |  |     before_cycles =  | 
233  |  |         static_cast<uint64_t>(GET_CURRENT_TIME_NANOS_CYCLECLOCK_NOW());  | 
234  |  |     current_time_nanos_from_system = GET_CURRENT_TIME_NANOS_FROM_SYSTEM();  | 
235  |  |     after_cycles =  | 
236  |  |         static_cast<uint64_t>(GET_CURRENT_TIME_NANOS_CYCLECLOCK_NOW());  | 
237  |  |     // elapsed_cycles is unsigned, so is large on overflow  | 
238  |  |     elapsed_cycles = after_cycles - before_cycles;  | 
239  |  |     if (elapsed_cycles >= local_approx_syscall_time_in_cycles &&  | 
240  |  |         ++loops == 20) {  // clock changed frequencies?  Back off. | 
241  |  |       loops = 0;  | 
242  |  |       if (local_approx_syscall_time_in_cycles < 1000 * 1000) { | 
243  |  |         local_approx_syscall_time_in_cycles =  | 
244  |  |             (local_approx_syscall_time_in_cycles + 1) << 1;  | 
245  |  |       }  | 
246  |  |       time_state.approx_syscall_time_in_cycles.store(  | 
247  |  |           local_approx_syscall_time_in_cycles, std::memory_order_relaxed);  | 
248  |  |     }  | 
249  |  |   } while (elapsed_cycles >= local_approx_syscall_time_in_cycles ||  | 
250  |  |            last_cycleclock - after_cycles < (static_cast<uint64_t>(1) << 16));  | 
251  |  |  | 
252  |  |   // Adjust approx_syscall_time_in_cycles to be within a factor of 2  | 
253  |  |   // of the typical time to execute one iteration of the loop above.  | 
254  |  |   if ((local_approx_syscall_time_in_cycles >> 1) < elapsed_cycles) { | 
255  |  |     // measured time is no smaller than half current approximation  | 
256  |  |     time_state.kernel_time_seen_smaller.store(0, std::memory_order_relaxed);  | 
257  |  |   } else if (time_state.kernel_time_seen_smaller.fetch_add(  | 
258  |  |                  1, std::memory_order_relaxed) >= 3) { | 
259  |  |     // smaller delays several times in a row; reduce approximation by 12.5%  | 
260  |  |     const uint64_t new_approximation =  | 
261  |  |         local_approx_syscall_time_in_cycles -  | 
262  |  |         (local_approx_syscall_time_in_cycles >> 3);  | 
263  |  |     time_state.approx_syscall_time_in_cycles.store(new_approximation,  | 
264  |  |                                                    std::memory_order_relaxed);  | 
265  |  |     time_state.kernel_time_seen_smaller.store(0, std::memory_order_relaxed);  | 
266  |  |   }  | 
267  |  |  | 
268  |  |   *cycleclock = after_cycles;  | 
269  |  |   return current_time_nanos_from_system;  | 
270  |  | }  | 
271  |  |  | 
272  |  | static int64_t GetCurrentTimeNanosSlowPath() ABSL_ATTRIBUTE_COLD;  | 
273  |  |  | 
274  |  | // Read the contents of *atomic into *sample.  | 
275  |  | // Each field is read atomically, but to maintain atomicity between fields,  | 
276  |  | // the access must be done under a lock.  | 
277  |  | static void ReadTimeSampleAtomic(const struct TimeSampleAtomic* atomic,  | 
278  |  |                                  struct TimeSample* sample) { | 
279  |  |   sample->base_ns = atomic->base_ns.load(std::memory_order_relaxed);  | 
280  |  |   sample->base_cycles = atomic->base_cycles.load(std::memory_order_relaxed);  | 
281  |  |   sample->nsscaled_per_cycle =  | 
282  |  |       atomic->nsscaled_per_cycle.load(std::memory_order_relaxed);  | 
283  |  |   sample->min_cycles_per_sample =  | 
284  |  |       atomic->min_cycles_per_sample.load(std::memory_order_relaxed);  | 
285  |  |   sample->raw_ns = atomic->raw_ns.load(std::memory_order_relaxed);  | 
286  |  | }  | 
287  |  |  | 
288  |  | // Public routine.  | 
289  |  | // Algorithm:  We wish to compute real time from a cycle counter.  In normal  | 
290  |  | // operation, we construct a piecewise linear approximation to the kernel time  | 
291  |  | // source, using the cycle counter value.  The start of each line segment is at  | 
292  |  | // the same point as the end of the last, but may have a different slope (that  | 
293  |  | // is, a different idea of the cycle counter frequency).  Every couple of  | 
294  |  | // seconds, the kernel time source is sampled and compared with the current  | 
295  |  | // approximation.  A new slope is chosen that, if followed for another couple  | 
296  |  | // of seconds, will correct the error at the current position.  The information  | 
297  |  | // for a sample is in the "last_sample" struct.  The linear approximation is  | 
298  |  | //   estimated_time = last_sample.base_ns +  | 
299  |  | //     last_sample.ns_per_cycle * (counter_reading - last_sample.base_cycles)  | 
300  |  | // (ns_per_cycle is actually stored in different units and scaled, to avoid  | 
301  |  | // overflow).  The base_ns of the next linear approximation is the  | 
302  |  | // estimated_time using the last approximation; the base_cycles is the cycle  | 
303  |  | // counter value at that time; the ns_per_cycle is the number of ns per cycle  | 
304  |  | // measured since the last sample, but adjusted so that most of the difference  | 
305  |  | // between the estimated_time and the kernel time will be corrected by the  | 
306  |  | // estimated time to the next sample.  In normal operation, this algorithm  | 
307  |  | // relies on:  | 
308  |  | // - the cycle counter and kernel time rates not changing a lot in a few  | 
309  |  | //   seconds.  | 
310  |  | // - the client calling into the code often compared to a couple of seconds, so  | 
311  |  | //   the time to the next correction can be estimated.  | 
312  |  | // Any time ns_per_cycle is not known, a major error is detected, or the  | 
313  |  | // assumption about frequent calls is violated, the implementation returns the  | 
314  |  | // kernel time.  It records sufficient data that a linear approximation can  | 
315  |  | // resume a little later.  | 
316  |  |  | 
317  |  | int64_t GetCurrentTimeNanos() { | 
318  |  |   // read the data from the "last_sample" struct (but don't need raw_ns yet)  | 
319  |  |   // The reads of "seq" and test of the values emulate a reader lock.  | 
320  |  |   uint64_t base_ns;  | 
321  |  |   uint64_t base_cycles;  | 
322  |  |   uint64_t nsscaled_per_cycle;  | 
323  |  |   uint64_t min_cycles_per_sample;  | 
324  |  |   uint64_t seq_read0;  | 
325  |  |   uint64_t seq_read1;  | 
326  |  |  | 
327  |  |   // If we have enough information to interpolate, the value returned will be  | 
328  |  |   // derived from this cycleclock-derived time estimate.  On some platforms  | 
329  |  |   // (POWER) the function to retrieve this value has enough complexity to  | 
330  |  |   // contribute to register pressure - reading it early before initializing  | 
331  |  |   // the other pieces of the calculation minimizes spill/restore instructions,  | 
332  |  |   // minimizing icache cost.  | 
333  |  |   uint64_t now_cycles =  | 
334  |  |       static_cast<uint64_t>(GET_CURRENT_TIME_NANOS_CYCLECLOCK_NOW());  | 
335  |  |  | 
336  |  |   // Acquire pairs with the barrier in SeqRelease - if this load sees that  | 
337  |  |   // store, the shared-data reads necessarily see that SeqRelease's updates  | 
338  |  |   // to the same shared data.  | 
339  |  |   seq_read0 = time_state.seq.load(std::memory_order_acquire);  | 
340  |  |  | 
341  |  |   base_ns = time_state.last_sample.base_ns.load(std::memory_order_relaxed);  | 
342  |  |   base_cycles =  | 
343  |  |       time_state.last_sample.base_cycles.load(std::memory_order_relaxed);  | 
344  |  |   nsscaled_per_cycle =  | 
345  |  |       time_state.last_sample.nsscaled_per_cycle.load(std::memory_order_relaxed);  | 
346  |  |   min_cycles_per_sample = time_state.last_sample.min_cycles_per_sample.load(  | 
347  |  |       std::memory_order_relaxed);  | 
348  |  |  | 
349  |  |   // This acquire fence pairs with the release fence in SeqAcquire.  Since it  | 
350  |  |   // is sequenced between reads of shared data and seq_read1, the reads of  | 
351  |  |   // shared data are effectively acquiring.  | 
352  |  |   std::atomic_thread_fence(std::memory_order_acquire);  | 
353  |  |  | 
354  |  |   // The shared-data reads are effectively acquire ordered, and the  | 
355  |  |   // shared-data writes are effectively release ordered. Therefore if our  | 
356  |  |   // shared-data reads see any of a particular update's shared-data writes,  | 
357  |  |   // seq_read1 is guaranteed to see that update's SeqAcquire.  | 
358  |  |   seq_read1 = time_state.seq.load(std::memory_order_relaxed);  | 
359  |  |  | 
360  |  |   // Fast path.  Return if min_cycles_per_sample has not yet elapsed since the  | 
361  |  |   // last sample, and we read a consistent sample.  The fast path activates  | 
362  |  |   // only when min_cycles_per_sample is non-zero, which happens when we get an  | 
363  |  |   // estimate for the cycle time.  The predicate will fail if now_cycles <  | 
364  |  |   // base_cycles, or if some other thread is in the slow path.  | 
365  |  |   //  | 
366  |  |   // Since we now read now_cycles before base_ns, it is possible for now_cycles  | 
367  |  |   // to be less than base_cycles (if we were interrupted between those loads and  | 
368  |  |   // last_sample was updated). This is harmless, because delta_cycles will wrap  | 
369  |  |   // and report a time much much bigger than min_cycles_per_sample. In that case  | 
370  |  |   // we will take the slow path.  | 
371  |  |   uint64_t delta_cycles;  | 
372  |  |   if (seq_read0 == seq_read1 && (seq_read0 & 1) == 0 &&  | 
373  |  |       (delta_cycles = now_cycles - base_cycles) < min_cycles_per_sample) { | 
374  |  |     return static_cast<int64_t>(  | 
375  |  |         base_ns + ((delta_cycles * nsscaled_per_cycle) >> kScale));  | 
376  |  |   }  | 
377  |  |   return GetCurrentTimeNanosSlowPath();  | 
378  |  | }  | 
379  |  |  | 
380  |  | // Return (a << kScale)/b.  | 
381  |  | // Zero is returned if b==0.   Scaling is performed internally to  | 
382  |  | // preserve precision without overflow.  | 
383  |  | static uint64_t SafeDivideAndScale(uint64_t a, uint64_t b) { | 
384  |  |   // Find maximum safe_shift so that  | 
385  |  |   //  0 <= safe_shift <= kScale  and  (a << safe_shift) does not overflow.  | 
386  |  |   int safe_shift = kScale;  | 
387  |  |   while (((a << safe_shift) >> safe_shift) != a) { | 
388  |  |     safe_shift--;  | 
389  |  |   }  | 
390  |  |   uint64_t scaled_b = b >> (kScale - safe_shift);  | 
391  |  |   uint64_t quotient = 0;  | 
392  |  |   if (scaled_b != 0) { | 
393  |  |     quotient = (a << safe_shift) / scaled_b;  | 
394  |  |   }  | 
395  |  |   return quotient;  | 
396  |  | }  | 
397  |  |  | 
398  |  | static uint64_t UpdateLastSample(  | 
399  |  |     uint64_t now_cycles, uint64_t now_ns, uint64_t delta_cycles,  | 
400  |  |     const struct TimeSample* sample) ABSL_ATTRIBUTE_COLD;  | 
401  |  |  | 
402  |  | // The slow path of GetCurrentTimeNanos().  This is taken while gathering  | 
403  |  | // initial samples, when enough time has elapsed since the last sample, and if  | 
404  |  | // any other thread is writing to last_sample.  | 
405  |  | //  | 
406  |  | // Manually mark this 'noinline' to minimize stack frame size of the fast  | 
407  |  | // path.  Without this, sometimes a compiler may inline this big block of code  | 
408  |  | // into the fast path.  That causes lots of register spills and reloads that  | 
409  |  | // are unnecessary unless the slow path is taken.  | 
410  |  | //  | 
411  |  | // TODO(absl-team): Remove this attribute when our compiler is smart enough  | 
412  |  | // to do the right thing.  | 
413  |  | ABSL_ATTRIBUTE_NOINLINE  | 
414  |  | static int64_t GetCurrentTimeNanosSlowPath()  | 
415  |  |     ABSL_LOCKS_EXCLUDED(time_state.lock) { | 
416  |  |   // Serialize access to slow-path.  Fast-path readers are not blocked yet, and  | 
417  |  |   // code below must not modify last_sample until the seqlock is acquired.  | 
418  |  |   base_internal::SpinLockHolder l(time_state.lock);  | 
419  |  |  | 
420  |  |   // Sample the kernel time base.  This is the definition of  | 
421  |  |   // "now" if we take the slow path.  | 
422  |  |   uint64_t now_cycles;  | 
423  |  |   uint64_t now_ns = static_cast<uint64_t>(  | 
424  |  |       GetCurrentTimeNanosFromKernel(time_state.last_now_cycles, &now_cycles));  | 
425  |  |   time_state.last_now_cycles = now_cycles;  | 
426  |  |  | 
427  |  |   uint64_t estimated_base_ns;  | 
428  |  |  | 
429  |  |   // ----------  | 
430  |  |   // Read the "last_sample" values again; this time holding the write lock.  | 
431  |  |   struct TimeSample sample;  | 
432  |  |   ReadTimeSampleAtomic(&time_state.last_sample, &sample);  | 
433  |  |  | 
434  |  |   // ----------  | 
435  |  |   // Try running the fast path again; another thread may have updated the  | 
436  |  |   // sample between our run of the fast path and the sample we just read.  | 
437  |  |   uint64_t delta_cycles = now_cycles - sample.base_cycles;  | 
438  |  |   if (delta_cycles < sample.min_cycles_per_sample) { | 
439  |  |     // Another thread updated the sample.  This path does not take the seqlock  | 
440  |  |     // so that blocked readers can make progress without blocking new readers.  | 
441  |  |     estimated_base_ns =  | 
442  |  |         sample.base_ns + ((delta_cycles * sample.nsscaled_per_cycle) >> kScale);  | 
443  |  |     time_state.stats_fast_slow_paths++;  | 
444  |  |   } else { | 
445  |  |     estimated_base_ns =  | 
446  |  |         UpdateLastSample(now_cycles, now_ns, delta_cycles, &sample);  | 
447  |  |   }  | 
448  |  |  | 
449  |  |   return static_cast<int64_t>(estimated_base_ns);  | 
450  |  | }  | 
451  |  |  | 
452  |  | // Main part of the algorithm.  Locks out readers, updates the approximation  | 
453  |  | // using the new sample from the kernel, and stores the result in last_sample  | 
454  |  | // for readers.  Returns the new estimated time.  | 
455  |  | static uint64_t UpdateLastSample(uint64_t now_cycles, uint64_t now_ns,  | 
456  |  |                                  uint64_t delta_cycles,  | 
457  |  |                                  const struct TimeSample* sample)  | 
458  |  |     ABSL_EXCLUSIVE_LOCKS_REQUIRED(time_state.lock) { | 
459  |  |   uint64_t estimated_base_ns = now_ns;  | 
460  |  |   uint64_t lock_value =  | 
461  |  |       SeqAcquire(&time_state.seq);  // acquire seqlock to block readers  | 
462  |  |  | 
463  |  |   // The 5s in the next if-statement limits the time for which we will trust  | 
464  |  |   // the cycle counter and our last sample to give a reasonable result.  | 
465  |  |   // Errors in the rate of the source clock can be multiplied by the ratio  | 
466  |  |   // between this limit and kMinNSBetweenSamples.  | 
467  |  |   if (sample->raw_ns == 0 ||  // no recent sample, or clock went backwards  | 
468  |  |       sample->raw_ns + static_cast<uint64_t>(5) * 1000 * 1000 * 1000 < now_ns ||  | 
469  |  |       now_ns < sample->raw_ns || now_cycles < sample->base_cycles) { | 
470  |  |     // record this sample, and forget any previously known slope.  | 
471  |  |     time_state.last_sample.raw_ns.store(now_ns, std::memory_order_relaxed);  | 
472  |  |     time_state.last_sample.base_ns.store(estimated_base_ns,  | 
473  |  |                                          std::memory_order_relaxed);  | 
474  |  |     time_state.last_sample.base_cycles.store(now_cycles,  | 
475  |  |                                              std::memory_order_relaxed);  | 
476  |  |     time_state.last_sample.nsscaled_per_cycle.store(0,  | 
477  |  |                                                     std::memory_order_relaxed);  | 
478  |  |     time_state.last_sample.min_cycles_per_sample.store(  | 
479  |  |         0, std::memory_order_relaxed);  | 
480  |  |     time_state.stats_initializations++;  | 
481  |  |   } else if (sample->raw_ns + 500 * 1000 * 1000 < now_ns &&  | 
482  |  |              sample->base_cycles + 50 < now_cycles) { | 
483  |  |     // Enough time has passed to compute the cycle time.  | 
484  |  |     if (sample->nsscaled_per_cycle != 0) {  // Have a cycle time estimate. | 
485  |  |       // Compute time from counter reading, but avoiding overflow  | 
486  |  |       // delta_cycles may be larger than on the fast path.  | 
487  |  |       uint64_t estimated_scaled_ns;  | 
488  |  |       int s = -1;  | 
489  |  |       do { | 
490  |  |         s++;  | 
491  |  |         estimated_scaled_ns = (delta_cycles >> s) * sample->nsscaled_per_cycle;  | 
492  |  |       } while (estimated_scaled_ns / sample->nsscaled_per_cycle !=  | 
493  |  |                (delta_cycles >> s));  | 
494  |  |       estimated_base_ns =  | 
495  |  |           sample->base_ns + (estimated_scaled_ns >> (kScale - s));  | 
496  |  |     }  | 
497  |  |  | 
498  |  |     // Compute the assumed cycle time kMinNSBetweenSamples ns into the future  | 
499  |  |     // assuming the cycle counter rate stays the same as the last interval.  | 
500  |  |     uint64_t ns = now_ns - sample->raw_ns;  | 
501  |  |     uint64_t measured_nsscaled_per_cycle = SafeDivideAndScale(ns, delta_cycles);  | 
502  |  |  | 
503  |  |     uint64_t assumed_next_sample_delta_cycles =  | 
504  |  |         SafeDivideAndScale(kMinNSBetweenSamples, measured_nsscaled_per_cycle);  | 
505  |  |  | 
506  |  |     // Estimate low by this much.  | 
507  |  |     int64_t diff_ns = static_cast<int64_t>(now_ns - estimated_base_ns);  | 
508  |  |  | 
509  |  |     // We want to set nsscaled_per_cycle so that our estimate of the ns time  | 
510  |  |     // at the assumed cycle time is the assumed ns time.  | 
511  |  |     // That is, we want to set nsscaled_per_cycle so:  | 
512  |  |     //  kMinNSBetweenSamples + diff_ns  ==  | 
513  |  |     //  (assumed_next_sample_delta_cycles * nsscaled_per_cycle) >> kScale  | 
514  |  |     // But we wish to damp oscillations, so instead correct only most  | 
515  |  |     // of our current error, by solving:  | 
516  |  |     //  kMinNSBetweenSamples + diff_ns - (diff_ns / 16) ==  | 
517  |  |     //  (assumed_next_sample_delta_cycles * nsscaled_per_cycle) >> kScale  | 
518  |  |     ns = static_cast<uint64_t>(static_cast<int64_t>(kMinNSBetweenSamples) +  | 
519  |  |                                diff_ns - (diff_ns / 16));  | 
520  |  |     uint64_t new_nsscaled_per_cycle =  | 
521  |  |         SafeDivideAndScale(ns, assumed_next_sample_delta_cycles);  | 
522  |  |     if (new_nsscaled_per_cycle != 0 && diff_ns < 100 * 1000 * 1000 &&  | 
523  |  |         -diff_ns < 100 * 1000 * 1000) { | 
524  |  |       // record the cycle time measurement  | 
525  |  |       time_state.last_sample.nsscaled_per_cycle.store(  | 
526  |  |           new_nsscaled_per_cycle, std::memory_order_relaxed);  | 
527  |  |       uint64_t new_min_cycles_per_sample =  | 
528  |  |           SafeDivideAndScale(kMinNSBetweenSamples, new_nsscaled_per_cycle);  | 
529  |  |       time_state.last_sample.min_cycles_per_sample.store(  | 
530  |  |           new_min_cycles_per_sample, std::memory_order_relaxed);  | 
531  |  |       time_state.stats_calibrations++;  | 
532  |  |     } else {  // something went wrong; forget the slope | 
533  |  |       time_state.last_sample.nsscaled_per_cycle.store(  | 
534  |  |           0, std::memory_order_relaxed);  | 
535  |  |       time_state.last_sample.min_cycles_per_sample.store(  | 
536  |  |           0, std::memory_order_relaxed);  | 
537  |  |       estimated_base_ns = now_ns;  | 
538  |  |       time_state.stats_reinitializations++;  | 
539  |  |     }  | 
540  |  |     time_state.last_sample.raw_ns.store(now_ns, std::memory_order_relaxed);  | 
541  |  |     time_state.last_sample.base_ns.store(estimated_base_ns,  | 
542  |  |                                          std::memory_order_relaxed);  | 
543  |  |     time_state.last_sample.base_cycles.store(now_cycles,  | 
544  |  |                                              std::memory_order_relaxed);  | 
545  |  |   } else { | 
546  |  |     // have a sample, but no slope; waiting for enough time for a calibration  | 
547  |  |     time_state.stats_slow_paths++;  | 
548  |  |   }  | 
549  |  |  | 
550  |  |   SeqRelease(&time_state.seq, lock_value);  // release the readers  | 
551  |  |  | 
552  |  |   return estimated_base_ns;  | 
553  |  | }  | 
554  |  | ABSL_NAMESPACE_END  | 
555  |  | }  // namespace absl  | 
556  |  | #endif  // ABSL_USE_CYCLECLOCK_FOR_GET_CURRENT_TIME_NANOS  | 
557  |  |  | 
558  |  | namespace absl { | 
559  |  | ABSL_NAMESPACE_BEGIN  | 
560  |  | namespace { | 
561  |  |  | 
562  |  | // Returns the maximum duration that SleepOnce() can sleep for.  | 
563  | 0  | constexpr absl::Duration MaxSleep() { | 
564  |  | #ifdef _WIN32  | 
565  |  |   // Windows Sleep() takes unsigned long argument in milliseconds.  | 
566  |  |   return absl::Milliseconds(  | 
567  |  |       std::numeric_limits<unsigned long>::max());  // NOLINT(runtime/int)  | 
568  |  | #else  | 
569  | 0  |   return absl::Seconds(std::numeric_limits<time_t>::max());  | 
570  | 0  | #endif  | 
571  | 0  | }  | 
572  |  |  | 
573  |  | // Sleeps for the given duration.  | 
574  |  | // REQUIRES: to_sleep <= MaxSleep().  | 
575  | 0  | void SleepOnce(absl::Duration to_sleep) { | 
576  |  | #ifdef _WIN32  | 
577  |  |   Sleep(static_cast<DWORD>(to_sleep / absl::Milliseconds(1)));  | 
578  |  | #else  | 
579  | 0  |   struct timespec sleep_time = absl::ToTimespec(to_sleep);  | 
580  | 0  |   while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR) { | 
581  |  |     // Ignore signals and wait for the full interval to elapse.  | 
582  | 0  |   }  | 
583  | 0  | #endif  | 
584  | 0  | }  | 
585  |  |  | 
586  |  | }  // namespace  | 
587  |  | ABSL_NAMESPACE_END  | 
588  |  | }  // namespace absl  | 
589  |  |  | 
590  |  | extern "C" { | 
591  |  |  | 
592  |  | ABSL_ATTRIBUTE_WEAK void ABSL_INTERNAL_C_SYMBOL(AbslInternalSleepFor)(  | 
593  | 0  |     absl::Duration duration) { | 
594  | 0  |   while (duration > absl::ZeroDuration()) { | 
595  | 0  |     absl::Duration to_sleep = std::min(duration, absl::MaxSleep());  | 
596  | 0  |     absl::SleepOnce(to_sleep);  | 
597  | 0  |     duration -= to_sleep;  | 
598  | 0  |   }  | 
599  | 0  | }  | 
600  |  |  | 
601  |  | }  // extern "C"  |