/src/LPM/external.protobuf/include/absl/base/prefetch.h
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // Copyright 2023 The Abseil Authors  | 
2  |  | //  | 
3  |  | // Licensed under the Apache License, Version 2.0 (the "License");  | 
4  |  | // you may not use this file except in compliance with the License.  | 
5  |  | // You may obtain a copy of the License at  | 
6  |  | //  | 
7  |  | //     https://www.apache.org/licenses/LICENSE-2.0  | 
8  |  | //  | 
9  |  | // Unless required by applicable law or agreed to in writing, software  | 
10  |  | // distributed under the License is distributed on an "AS IS" BASIS,  | 
11  |  | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  | 
12  |  | // See the License for the specific language governing permissions and  | 
13  |  | // limitations under the License.  | 
14  |  | //  | 
15  |  | // -----------------------------------------------------------------------------  | 
16  |  | // File: prefetch.h  | 
17  |  | // -----------------------------------------------------------------------------  | 
18  |  | //  | 
19  |  | // This header file defines prefetch functions to prefetch memory contents  | 
20  |  | // into the first level cache (L1) for the current CPU. The prefetch logic  | 
21  |  | // offered in this header is limited to prefetching first level cachelines  | 
22  |  | // only, and is aimed at relatively 'simple' prefetching logic.  | 
23  |  | //  | 
24  |  | #ifndef ABSL_BASE_PREFETCH_H_  | 
25  |  | #define ABSL_BASE_PREFETCH_H_  | 
26  |  |  | 
27  |  | #include "absl/base/attributes.h"  | 
28  |  | #include "absl/base/config.h"  | 
29  |  |  | 
30  |  | #if defined(ABSL_INTERNAL_HAVE_SSE)  | 
31  |  | #include <xmmintrin.h>  | 
32  |  | #endif  | 
33  |  |  | 
34  |  | #if defined(_MSC_VER)  | 
35  |  | #include <intrin.h>  | 
36  |  | #if defined(ABSL_INTERNAL_HAVE_SSE)  | 
37  |  | #pragma intrinsic(_mm_prefetch)  | 
38  |  | #endif  | 
39  |  | #endif  | 
40  |  |  | 
41  |  | namespace absl { | 
42  |  | ABSL_NAMESPACE_BEGIN  | 
43  |  |  | 
44  |  | // Moves data into the L1 cache before it is read, or "prefetches" it.  | 
45  |  | //  | 
46  |  | // The value of `addr` is the address of the memory to prefetch. If  | 
47  |  | // the target and compiler support it, data prefetch instructions are  | 
48  |  | // generated. If the prefetch is done some time before the memory is  | 
49  |  | // read, it may be in the cache by the time the read occurs.  | 
50  |  | //  | 
51  |  | // This method prefetches data with the highest degree of temporal locality;  | 
52  |  | // data is prefetched where possible into all levels of the cache.  | 
53  |  | //  | 
54  |  | // Incorrect or gratuitous use of this function can degrade performance.  | 
55  |  | // Use this function only when representative benchmarks show an improvement.  | 
56  |  | //  | 
57  |  | // Example:  | 
58  |  | //  | 
59  |  | //  // Computes incremental checksum for `data`.  | 
60  |  | //  int ComputeChecksum(int sum, absl::string_view data);  | 
61  |  | //  | 
62  |  | //  // Computes cumulative checksum for all values in `data`  | 
63  |  | //  int ComputeChecksum(absl::Span<const std::string> data) { | 
64  |  | //    int sum = 0;  | 
65  |  | //    auto it = data.begin();  | 
66  |  | //    auto pit = data.begin();  | 
67  |  | //    auto end = data.end();  | 
68  |  | //    for (int dist = 8; dist > 0 && pit != data.end(); --dist, ++pit) { | 
69  |  | //      absl::PrefetchToLocalCache(pit->data());  | 
70  |  | //    }  | 
71  |  | //    for (; pit != end; ++pit, ++it) { | 
72  |  | //      sum = ComputeChecksum(sum, *it);  | 
73  |  | //      absl::PrefetchToLocalCache(pit->data());  | 
74  |  | //    }  | 
75  |  | //    for (; it != end; ++it) { | 
76  |  | //      sum = ComputeChecksum(sum, *it);  | 
77  |  | //    }  | 
78  |  | //    return sum;  | 
79  |  | //  }  | 
80  |  | //  | 
81  |  | void PrefetchToLocalCache(const void* addr);  | 
82  |  |  | 
83  |  | // Moves data into the L1 cache before it is read, or "prefetches" it.  | 
84  |  | //  | 
85  |  | // This function is identical to `PrefetchToLocalCache()` except that it has  | 
86  |  | // non-temporal locality: the fetched data should not be left in any of the  | 
87  |  | // cache tiers. This is useful for cases where the data is used only once /  | 
88  |  | // short term, for example, invoking a destructor on an object.  | 
89  |  | //  | 
90  |  | // Incorrect or gratuitous use of this function can degrade performance.  | 
91  |  | // Use this function only when representative benchmarks show an improvement.  | 
92  |  | //  | 
93  |  | // Example:  | 
94  |  | //  | 
95  |  | //  template <typename Iterator>  | 
96  |  | //  void DestroyPointers(Iterator begin, Iterator end) { | 
97  |  | //    size_t distance = std::min(8U, bars.size());  | 
98  |  | //  | 
99  |  | //    int dist = 8;  | 
100  |  | //    auto prefetch_it = begin;  | 
101  |  | //    while (prefetch_it != end && --dist;) { | 
102  |  | //      absl::PrefetchToLocalCacheNta(*prefetch_it++);  | 
103  |  | //    }  | 
104  |  | //    while (prefetch_it != end) { | 
105  |  | //      delete *begin++;  | 
106  |  | //      absl::PrefetchToLocalCacheNta(*prefetch_it++);  | 
107  |  | //    }  | 
108  |  | //    while (begin != end) { | 
109  |  | //      delete *begin++;  | 
110  |  | //    }  | 
111  |  | //  }  | 
112  |  | //  | 
113  |  | void PrefetchToLocalCacheNta(const void* addr);  | 
114  |  |  | 
115  |  | // Moves data into the L1 cache with the intent to modify it.  | 
116  |  | //  | 
117  |  | // This function is similar to `PrefetchToLocalCache()` except that it  | 
118  |  | // prefetches cachelines with an 'intent to modify' This typically includes  | 
119  |  | // invalidating cache entries for this address in all other cache tiers, and an  | 
120  |  | // exclusive access intent.  | 
121  |  | //  | 
122  |  | // Incorrect or gratuitous use of this function can degrade performance. As this  | 
123  |  | // function can invalidate cached cachelines on other caches and computer cores,  | 
124  |  | // incorrect usage of this function can have an even greater negative impact  | 
125  |  | // than incorrect regular prefetches.  | 
126  |  | // Use this function only when representative benchmarks show an improvement.  | 
127  |  | //  | 
128  |  | // Example:  | 
129  |  | //  | 
130  |  | //  void* Arena::Allocate(size_t size) { | 
131  |  | //    void* ptr = AllocateBlock(size);  | 
132  |  | //    absl::PrefetchToLocalCacheForWrite(p);  | 
133  |  | //    return ptr;  | 
134  |  | //  }  | 
135  |  | //  | 
136  |  | void PrefetchToLocalCacheForWrite(const void* addr);  | 
137  |  |  | 
138  |  | #if ABSL_HAVE_BUILTIN(__builtin_prefetch) || defined(__GNUC__)  | 
139  |  |  | 
140  |  | #define ABSL_HAVE_PREFETCH 1  | 
141  |  |  | 
142  |  | // See __builtin_prefetch:  | 
143  |  | // https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html.  | 
144  |  | //  | 
145  |  | ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCache(  | 
146  | 4.03M  |     const void* addr) { | 
147  | 4.03M  |   __builtin_prefetch(addr, 0, 3);  | 
148  | 4.03M  | }  | 
149  |  |  | 
150  |  | ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheNta(  | 
151  | 0  |     const void* addr) { | 
152  | 0  |   __builtin_prefetch(addr, 0, 0);  | 
153  | 0  | }  | 
154  |  |  | 
155  |  | ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheForWrite(  | 
156  | 0  |     const void* addr) { | 
157  | 0  |   // [x86] gcc/clang don't generate PREFETCHW for __builtin_prefetch(.., 1)  | 
158  | 0  |   // unless -march=broadwell or newer; this is not generally the default, so we  | 
159  | 0  |   // manually emit prefetchw. PREFETCHW is recognized as a no-op on older Intel  | 
160  | 0  |   // processors and has been present on AMD processors since the K6-2.  | 
161  | 0  | #if defined(__x86_64__) && !defined(__PRFCHW__)  | 
162  | 0  |   asm("prefetchw %0" : : "m"(*reinterpret_cast<const char*>(addr))); | 
163  | 0  | #else  | 
164  | 0  |   __builtin_prefetch(addr, 1, 3);  | 
165  | 0  | #endif  | 
166  | 0  | }  | 
167  |  |  | 
168  |  | #elif defined(ABSL_INTERNAL_HAVE_SSE)  | 
169  |  |  | 
170  |  | #define ABSL_HAVE_PREFETCH 1  | 
171  |  |  | 
172  |  | ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCache(  | 
173  |  |     const void* addr) { | 
174  |  |   _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_T0);  | 
175  |  | }  | 
176  |  |  | 
177  |  | ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheNta(  | 
178  |  |     const void* addr) { | 
179  |  |   _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_NTA);  | 
180  |  | }  | 
181  |  |  | 
182  |  | ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheForWrite(  | 
183  |  |     const void* addr) { | 
184  |  | #if defined(_MM_HINT_ET0)  | 
185  |  |   _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_ET0);  | 
186  |  | #elif !defined(_MSC_VER) && defined(__x86_64__)  | 
187  |  |   // _MM_HINT_ET0 is not universally supported. As we commented further  | 
188  |  |   // up, PREFETCHW is recognized as a no-op on older Intel processors  | 
189  |  |   // and has been present on AMD processors since the K6-2. We have this  | 
190  |  |   // disabled for MSVC compilers as this miscompiles on older MSVC compilers.  | 
191  |  |   asm("prefetchw %0" : : "m"(*reinterpret_cast<const char*>(addr))); | 
192  |  | #endif  | 
193  |  | }  | 
194  |  |  | 
195  |  | #else  | 
196  |  |  | 
197  |  | ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCache(  | 
198  |  |     const void* addr) {} | 
199  |  | ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheNta(  | 
200  |  |     const void* addr) {} | 
201  |  | ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheForWrite(  | 
202  |  |     const void* addr) {} | 
203  |  |  | 
204  |  | #endif  | 
205  |  |  | 
206  |  | ABSL_NAMESPACE_END  | 
207  |  | }  // namespace absl  | 
208  |  |  | 
209  |  | #endif  // ABSL_BASE_PREFETCH_H_  |