Coverage Report

Created: 2023-09-25 06:27

/src/abseil-cpp/absl/base/prefetch.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2023 The Abseil Authors
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//     https://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
//
15
// -----------------------------------------------------------------------------
16
// File: prefetch.h
17
// -----------------------------------------------------------------------------
18
//
19
// This header file defines prefetch functions to prefetch memory contents
20
// into the first level cache (L1) for the current CPU. The prefetch logic
21
// offered in this header is limited to prefetching first level cachelines
22
// only, and is aimed at relatively 'simple' prefetching logic.
23
//
24
#ifndef ABSL_BASE_PREFETCH_H_
25
#define ABSL_BASE_PREFETCH_H_
26
27
#include "absl/base/attributes.h"
28
#include "absl/base/config.h"
29
30
#if defined(ABSL_INTERNAL_HAVE_SSE)
31
#include <xmmintrin.h>
32
#endif
33
34
#if defined(_MSC_VER) && _MSC_VER >= 1900 && \
35
    (defined(_M_X64) || defined(_M_IX86))
36
#include <intrin.h>
37
#pragma intrinsic(_mm_prefetch)
38
#endif
39
40
namespace absl {
41
ABSL_NAMESPACE_BEGIN
42
43
// Moves data into the L1 cache before it is read, or "prefetches" it.
44
//
45
// The value of `addr` is the address of the memory to prefetch. If
46
// the target and compiler support it, data prefetch instructions are
47
// generated. If the prefetch is done some time before the memory is
48
// read, it may be in the cache by the time the read occurs.
49
//
50
// This method prefetches data with the highest degree of temporal locality;
51
// data is prefetched where possible into all levels of the cache.
52
//
53
// Incorrect or gratuitous use of this function can degrade performance.
54
// Use this function only when representative benchmarks show an improvement.
55
//
56
// Example:
57
//
58
//  // Computes incremental checksum for `data`.
59
//  int ComputeChecksum(int sum, absl::string_view data);
60
//
61
//  // Computes cumulative checksum for all values in `data`
62
//  int ComputeChecksum(absl::Span<const std::string> data) {
63
//    int sum = 0;
64
//    auto it = data.begin();
65
//    auto pit = data.begin();
66
//    auto end = data.end();
67
//    for (int dist = 8; dist > 0 && pit != data.end(); --dist, ++pit) {
68
//      absl::PrefetchToLocalCache(pit->data());
69
//    }
70
//    for (; pit != end; ++pit, ++it) {
71
//      sum = ComputeChecksum(sum, *it);
72
//      absl::PrefetchToLocalCache(pit->data());
73
//    }
74
//    for (; it != end; ++it) {
75
//      sum = ComputeChecksum(sum, *it);
76
//    }
77
//    return sum;
78
//  }
79
//
80
void PrefetchToLocalCache(const void* addr);
81
82
// Moves data into the L1 cache before it is read, or "prefetches" it.
83
//
84
// This function is identical to `PrefetchToLocalCache()` except that it has
85
// non-temporal locality: the fetched data should not be left in any of the
86
// cache tiers. This is useful for cases where the data is used only once /
87
// short term, for example, invoking a destructor on an object.
88
//
89
// Incorrect or gratuitous use of this function can degrade performance.
90
// Use this function only when representative benchmarks show an improvement.
91
//
92
// Example:
93
//
94
//  template <typename Iterator>
95
//  void DestroyPointers(Iterator begin, Iterator end) {
96
//    size_t distance = std::min(8U, bars.size());
97
//
98
//    int dist = 8;
99
//    auto prefetch_it = begin;
100
//    while (prefetch_it != end && --dist;) {
101
//      absl::PrefetchToLocalCacheNta(*prefetch_it++);
102
//    }
103
//    while (prefetch_it != end) {
104
//      delete *begin++;
105
//      absl::PrefetchToLocalCacheNta(*prefetch_it++);
106
//    }
107
//    while (begin != end) {
108
//      delete *begin++;
109
//    }
110
//  }
111
//
112
void PrefetchToLocalCacheNta(const void* addr);
113
114
// Moves data into the L1 cache with the intent to modify it.
115
//
116
// This function is similar to `PrefetchToLocalCache()` except that it
117
// prefetches cachelines with an 'intent to modify' This typically includes
118
// invalidating cache entries for this address in all other cache tiers, and an
119
// exclusive access intent.
120
//
121
// Incorrect or gratuitous use of this function can degrade performance. As this
122
// function can invalidate cached cachelines on other caches and computer cores,
123
// incorrect usage of this function can have an even greater negative impact
124
// than incorrect regular prefetches.
125
// Use this function only when representative benchmarks show an improvement.
126
//
127
// Example:
128
//
129
//  void* Arena::Allocate(size_t size) {
130
//    void* ptr = AllocateBlock(size);
131
//    absl::PrefetchToLocalCacheForWrite(p);
132
//    return ptr;
133
//  }
134
//
135
void PrefetchToLocalCacheForWrite(const void* addr);
136
137
#if ABSL_HAVE_BUILTIN(__builtin_prefetch) || defined(__GNUC__)
138
139
#define ABSL_HAVE_PREFETCH 1
140
141
// See __builtin_prefetch:
142
// https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html.
143
//
144
ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCache(
145
56
    const void* addr) {
146
56
  __builtin_prefetch(addr, 0, 3);
147
56
}
148
149
ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheNta(
150
0
    const void* addr) {
151
0
  __builtin_prefetch(addr, 0, 0);
152
0
}
153
154
ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheForWrite(
155
0
    const void* addr) {
156
0
  // [x86] gcc/clang don't generate PREFETCHW for __builtin_prefetch(.., 1)
157
0
  // unless -march=broadwell or newer; this is not generally the default, so we
158
0
  // manually emit prefetchw. PREFETCHW is recognized as a no-op on older Intel
159
0
  // processors and has been present on AMD processors since the K6-2.
160
0
#if defined(__x86_64__)
161
0
  asm("prefetchw (%0)" : : "r"(addr));
162
0
#else
163
0
  __builtin_prefetch(addr, 1, 3);
164
0
#endif
165
0
}
166
167
#elif defined(ABSL_INTERNAL_HAVE_SSE)
168
169
#define ABSL_HAVE_PREFETCH 1
170
171
ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCache(
172
    const void* addr) {
173
  _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_T0);
174
}
175
176
ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheNta(
177
    const void* addr) {
178
  _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_NTA);
179
}
180
181
ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheForWrite(
182
    const void* addr) {
183
#if defined(_MM_HINT_ET0)
184
  _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_ET0);
185
#elif !defined(_MSC_VER) && defined(__x86_64__)
186
  // _MM_HINT_ET0 is not universally supported. As we commented further
187
  // up, PREFETCHW is recognized as a no-op on older Intel processors
188
  // and has been present on AMD processors since the K6-2. We have this
189
  // disabled for MSVC compilers as this miscompiles on older MSVC compilers.
190
  asm("prefetchw (%0)" : : "r"(addr));
191
#endif
192
}
193
194
#else
195
196
ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCache(
197
    const void* addr) {}
198
ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheNta(
199
    const void* addr) {}
200
ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheForWrite(
201
    const void* addr) {}
202
203
#endif
204
205
ABSL_NAMESPACE_END
206
}  // namespace absl
207
208
#endif  // ABSL_BASE_PREFETCH_H_