/src/abseil-cpp/absl/base/prefetch.h

Source (jump to first uncovered line)
// Copyright 2023 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// -----------------------------------------------------------------------------
// File: prefetch.h
// -----------------------------------------------------------------------------
//
// This header file defines prefetch functions to prefetch memory contents
// into the first level cache (L1) for the current CPU. The prefetch logic
// offered in this header is limited to prefetching first level cachelines
// only, and is aimed at relatively 'simple' prefetching logic.
//
#ifndef ABSL_BASE_PREFETCH_H_
#define ABSL_BASE_PREFETCH_H_

#include "absl/base/attributes.h"
#include "absl/base/config.h"

#if defined(ABSL_INTERNAL_HAVE_SSE)
#include <xmmintrin.h>
#endif

#if defined(_MSC_VER) && _MSC_VER >= 1900 && \
    (defined(_M_X64) || defined(_M_IX86))
#include <intrin.h>
#pragma intrinsic(_mm_prefetch)
#endif

namespace absl {
ABSL_NAMESPACE_BEGIN

// Moves data into the L1 cache before it is read, or "prefetches" it.
//
// The value of `addr` is the address of the memory to prefetch. If
// the target and compiler support it, data prefetch instructions are
// generated. If the prefetch is done some time before the memory is
// read, it may be in the cache by the time the read occurs.
//
// This method prefetches data with the highest degree of temporal locality;
// data is prefetched where possible into all levels of the cache.
//
// Incorrect or gratuitous use of this function can degrade performance.
// Use this function only when representative benchmarks show an improvement.
//
// Example:
//
//  // Computes incremental checksum for `data`.
//  int ComputeChecksum(int sum, absl::string_view data);
//
//  // Computes cumulative checksum for all values in `data`
//  int ComputeChecksum(absl::Span<const std::string> data) {
//    int sum = 0;
//    auto it = data.begin();
//    auto pit = data.begin();
//    auto end = data.end();
//    for (int dist = 8; dist > 0 && pit != data.end(); --dist, ++pit) {
//      absl::PrefetchToLocalCache(pit->data());
//    }
//    for (; pit != end; ++pit, ++it) {
//      sum = ComputeChecksum(sum, *it);
//      absl::PrefetchToLocalCache(pit->data());
//    }
//    for (; it != end; ++it) {
//      sum = ComputeChecksum(sum, *it);
//    }
//    return sum;
//  }
//
void PrefetchToLocalCache(const void* addr);

// Moves data into the L1 cache before it is read, or "prefetches" it.
//
// This function is identical to `PrefetchToLocalCache()` except that it has
// non-temporal locality: the fetched data should not be left in any of the
// cache tiers. This is useful for cases where the data is used only once /
// short term, for example, invoking a destructor on an object.
//
// Incorrect or gratuitous use of this function can degrade performance.
// Use this function only when representative benchmarks show an improvement.
//
// Example:
//
//  template <typename Iterator>
//  void DestroyPointers(Iterator begin, Iterator end) {
//    size_t distance = std::min(8U, bars.size());
//
//    int dist = 8;
//    auto prefetch_it = begin;
//    while (prefetch_it != end && --dist;) {
//      absl::PrefetchToLocalCacheNta(*prefetch_it++);
//    }
//    while (prefetch_it != end) {
//      delete *begin++;
//      absl::PrefetchToLocalCacheNta(*prefetch_it++);
//    }
//    while (begin != end) {
//      delete *begin++;
//    }
//  }
//
void PrefetchToLocalCacheNta(const void* addr);

// Moves data into the L1 cache with the intent to modify it.
//
// This function is similar to `PrefetchToLocalCache()` except that it
// prefetches cachelines with an 'intent to modify' This typically includes
// invalidating cache entries for this address in all other cache tiers, and an
// exclusive access intent.
//
// Incorrect or gratuitous use of this function can degrade performance. As this
// function can invalidate cached cachelines on other caches and computer cores,
// incorrect usage of this function can have an even greater negative impact
// than incorrect regular prefetches.
// Use this function only when representative benchmarks show an improvement.
//
// Example:
//
//  void* Arena::Allocate(size_t size) {
//    void* ptr = AllocateBlock(size);
//    absl::PrefetchToLocalCacheForWrite(p);
//    return ptr;
//  }
//
void PrefetchToLocalCacheForWrite(const void* addr);

#if ABSL_HAVE_BUILTIN(__builtin_prefetch) || defined(__GNUC__)

#define ABSL_HAVE_PREFETCH 1

// See __builtin_prefetch:
// https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html.
//
ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCache(
    const void* addr) {
  __builtin_prefetch(addr, 0, 3);
}

ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheNta(
    const void* addr) {
  __builtin_prefetch(addr, 0, 0);
}

ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheForWrite(
    const void* addr) {
  // [x86] gcc/clang don't generate PREFETCHW for __builtin_prefetch(.., 1)
  // unless -march=broadwell or newer; this is not generally the default, so we
  // manually emit prefetchw. PREFETCHW is recognized as a no-op on older Intel
  // processors and has been present on AMD processors since the K6-2.
#if defined(__x86_64__)
  asm("prefetchw (%0)" : : "r"(addr));
#else
  __builtin_prefetch(addr, 1, 3);
#endif
}

#elif defined(ABSL_INTERNAL_HAVE_SSE)

#define ABSL_HAVE_PREFETCH 1

ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCache(
    const void* addr) {
  _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_T0);
}

ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheNta(
    const void* addr) {
  _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_NTA);
}

ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheForWrite(
    const void* addr) {
#if defined(_MM_HINT_ET0)
  _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_ET0);
#elif !defined(_MSC_VER) && defined(__x86_64__)
  // _MM_HINT_ET0 is not universally supported. As we commented further
  // up, PREFETCHW is recognized as a no-op on older Intel processors
  // and has been present on AMD processors since the K6-2. We have this
  // disabled for MSVC compilers as this miscompiles on older MSVC compilers.
  asm("prefetchw (%0)" : : "r"(addr));
#endif
}

#else

ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCache(
    const void* addr) {}
ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheNta(
    const void* addr) {}
ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheForWrite(
    const void* addr) {}

#endif

ABSL_NAMESPACE_END
}  // namespace absl

#endif  // ABSL_BASE_PREFETCH_H_

Coverage Report

Created: 2023-09-25 06:27

Line	Count	Source (jump to first uncovered line)
1		// Copyright 2023 The Abseil Authors
2		//
3		// Licensed under the Apache License, Version 2.0 (the "License");
4		// you may not use this file except in compliance with the License.
5		// You may obtain a copy of the License at
6		//
7		// https://www.apache.org/licenses/LICENSE-2.0
8		//
9		// Unless required by applicable law or agreed to in writing, software
10		// distributed under the License is distributed on an "AS IS" BASIS,
11		// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12		// See the License for the specific language governing permissions and
13		// limitations under the License.
14		//
15		// -----------------------------------------------------------------------------
16		// File: prefetch.h
17		// -----------------------------------------------------------------------------
18		//
19		// This header file defines prefetch functions to prefetch memory contents
20		// into the first level cache (L1) for the current CPU. The prefetch logic
21		// offered in this header is limited to prefetching first level cachelines
22		// only, and is aimed at relatively 'simple' prefetching logic.
23		//
24		#ifndef ABSL_BASE_PREFETCH_H_
25		#define ABSL_BASE_PREFETCH_H_
26
27		#include "absl/base/attributes.h"
28		#include "absl/base/config.h"
29
30		#if defined(ABSL_INTERNAL_HAVE_SSE)
31		#include <xmmintrin.h>
32		#endif
33
34		#if defined(_MSC_VER) && _MSC_VER >= 1900 && \
35		(defined(_M_X64) \|\| defined(_M_IX86))
36		#include <intrin.h>
37		#pragma intrinsic(_mm_prefetch)
38		#endif
39
40		namespace absl {
41		ABSL_NAMESPACE_BEGIN
42
43		// Moves data into the L1 cache before it is read, or "prefetches" it.
44		//
45		// The value of `addr` is the address of the memory to prefetch. If
46		// the target and compiler support it, data prefetch instructions are
47		// generated. If the prefetch is done some time before the memory is
48		// read, it may be in the cache by the time the read occurs.
49		//
50		// This method prefetches data with the highest degree of temporal locality;
51		// data is prefetched where possible into all levels of the cache.
52		//
53		// Incorrect or gratuitous use of this function can degrade performance.
54		// Use this function only when representative benchmarks show an improvement.
55		//
56		// Example:
57		//
58		// // Computes incremental checksum for `data`.
59		// int ComputeChecksum(int sum, absl::string_view data);
60		//
61		// // Computes cumulative checksum for all values in `data`
62		// int ComputeChecksum(absl::Span<const std::string> data) {
63		// int sum = 0;
64		// auto it = data.begin();
65		// auto pit = data.begin();
66		// auto end = data.end();
67		// for (int dist = 8; dist > 0 && pit != data.end(); --dist, ++pit) {
68		// absl::PrefetchToLocalCache(pit->data());
69		// }
70		// for (; pit != end; ++pit, ++it) {
71		// sum = ComputeChecksum(sum, *it);
72		// absl::PrefetchToLocalCache(pit->data());
73		// }
74		// for (; it != end; ++it) {
75		// sum = ComputeChecksum(sum, *it);
76		// }
77		// return sum;
78		// }
79		//
80		void PrefetchToLocalCache(const void* addr);
81
82		// Moves data into the L1 cache before it is read, or "prefetches" it.
83		//
84		// This function is identical to `PrefetchToLocalCache()` except that it has
85		// non-temporal locality: the fetched data should not be left in any of the
86		// cache tiers. This is useful for cases where the data is used only once /
87		// short term, for example, invoking a destructor on an object.
88		//
89		// Incorrect or gratuitous use of this function can degrade performance.
90		// Use this function only when representative benchmarks show an improvement.
91		//
92		// Example:
93		//
94		// template <typename Iterator>
95		// void DestroyPointers(Iterator begin, Iterator end) {
96		// size_t distance = std::min(8U, bars.size());
97		//
98		// int dist = 8;
99		// auto prefetch_it = begin;
100		// while (prefetch_it != end && --dist;) {
101		// absl::PrefetchToLocalCacheNta(*prefetch_it++);
102		// }
103		// while (prefetch_it != end) {
104		// delete *begin++;
105		// absl::PrefetchToLocalCacheNta(*prefetch_it++);
106		// }
107		// while (begin != end) {
108		// delete *begin++;
109		// }
110		// }
111		//
112		void PrefetchToLocalCacheNta(const void* addr);
113
114		// Moves data into the L1 cache with the intent to modify it.
115		//
116		// This function is similar to `PrefetchToLocalCache()` except that it
117		// prefetches cachelines with an 'intent to modify' This typically includes
118		// invalidating cache entries for this address in all other cache tiers, and an
119		// exclusive access intent.
120		//
121		// Incorrect or gratuitous use of this function can degrade performance. As this
122		// function can invalidate cached cachelines on other caches and computer cores,
123		// incorrect usage of this function can have an even greater negative impact
124		// than incorrect regular prefetches.
125		// Use this function only when representative benchmarks show an improvement.
126		//
127		// Example:
128		//
129		// void* Arena::Allocate(size_t size) {
130		// void* ptr = AllocateBlock(size);
131		// absl::PrefetchToLocalCacheForWrite(p);
132		// return ptr;
133		// }
134		//
135		void PrefetchToLocalCacheForWrite(const void* addr);
136
137		#if ABSL_HAVE_BUILTIN(__builtin_prefetch) \|\| defined(__GNUC__)
138
139		#define ABSL_HAVE_PREFETCH 1
140
141		// See __builtin_prefetch:
142		// https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html.
143		//
144		ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCache(
145	56	const void* addr) {
146	56	__builtin_prefetch(addr, 0, 3);
147	56	}
148
149		ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheNta(
150	0	const void* addr) {
151	0	__builtin_prefetch(addr, 0, 0);
152	0	}
153
154		ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheForWrite(
155	0	const void* addr) {
156	0	// [x86] gcc/clang don't generate PREFETCHW for __builtin_prefetch(.., 1)
157	0	// unless -march=broadwell or newer; this is not generally the default, so we
158	0	// manually emit prefetchw. PREFETCHW is recognized as a no-op on older Intel
159	0	// processors and has been present on AMD processors since the K6-2.
160	0	#if defined(__x86_64__)
161	0	asm("prefetchw (%0)" : : "r"(addr));
162	0	#else
163	0	__builtin_prefetch(addr, 1, 3);
164	0	#endif
165	0	}
166
167		#elif defined(ABSL_INTERNAL_HAVE_SSE)
168
169		#define ABSL_HAVE_PREFETCH 1
170
171		ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCache(
172		const void* addr) {
173		_mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_T0);
174		}
175
176		ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheNta(
177		const void* addr) {
178		_mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_NTA);
179		}
180
181		ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheForWrite(
182		const void* addr) {
183		#if defined(_MM_HINT_ET0)
184		_mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_ET0);
185		#elif !defined(_MSC_VER) && defined(__x86_64__)
186		// _MM_HINT_ET0 is not universally supported. As we commented further
187		// up, PREFETCHW is recognized as a no-op on older Intel processors
188		// and has been present on AMD processors since the K6-2. We have this
189		// disabled for MSVC compilers as this miscompiles on older MSVC compilers.
190		asm("prefetchw (%0)" : : "r"(addr));
191		#endif
192		}
193
194		#else
195
196		ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCache(
197		const void* addr) {}
198		ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheNta(
199		const void* addr) {}
200		ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheForWrite(
201		const void* addr) {}
202
203		#endif
204
205		ABSL_NAMESPACE_END
206		} // namespace absl
207
208		#endif // ABSL_BASE_PREFETCH_H_