/src/libdeflate/lib/x86/adler32_impl.h

Source (jump to first uncovered line)
/*
 * x86/adler32_impl.h - x86 implementations of Adler-32 checksum algorithm
 *
 * Copyright 2016 Eric Biggers
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */

#ifndef LIB_X86_ADLER32_IMPL_H
#define LIB_X86_ADLER32_IMPL_H

#include "cpu_features.h"

/* SSE2 and AVX2 implementations.  Used on older CPUs. */
#if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)
#  define adler32_x86_sse2  adler32_x86_sse2
#  define SUFFIX         _sse2
#  define ATTRIBUTES    _target_attribute("sse2")
#  define VL      16
#  define USE_VNNI    0
#  define USE_AVX512    0
#  include "adler32_template.h"

#  define adler32_x86_avx2  adler32_x86_avx2
#  define SUFFIX         _avx2
#  define ATTRIBUTES    _target_attribute("avx2")
#  define VL      32
#  define USE_VNNI    0
#  define USE_AVX512    0
#  include "adler32_template.h"
#endif

/*
 * AVX-VNNI implementation.  This is used on CPUs that have AVX2 and AVX-VNNI
 * but don't have AVX-512, for example Intel Alder Lake.
 *
 * Unusually for a new CPU feature, gcc added support for the AVX-VNNI
 * intrinsics (in gcc 11.1) slightly before binutils added support for
 * assembling AVX-VNNI instructions (in binutils 2.36).  Distros can reasonably
 * have gcc 11 with binutils 2.35.  Because of this issue, we check for gcc 12
 * instead of gcc 11.  (libdeflate supports direct compilation without a
 * configure step, so checking the binutils version is not always an option.)
 */
#if (GCC_PREREQ(12, 1) || CLANG_PREREQ(12, 0, 13000000) || MSVC_PREREQ(1930)) && \
  !defined(LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_AVX_VNNI)
#  define adler32_x86_avx2_vnni adler32_x86_avx2_vnni
#  define SUFFIX         _avx2_vnni
#  define ATTRIBUTES    _target_attribute("avx2,avxvnni")
#  define VL      32
#  define USE_VNNI    1
#  define USE_AVX512    0
#  include "adler32_template.h"
#endif

#if (GCC_PREREQ(8, 1) || CLANG_PREREQ(6, 0, 10000000) || MSVC_PREREQ(1920)) && \
  !defined(LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_AVX512VNNI)
/*
 * AVX512VNNI implementation using 256-bit vectors.  This is very similar to the
 * AVX-VNNI implementation but takes advantage of masking and more registers.
 * This is used on certain older Intel CPUs, specifically Ice Lake and Tiger
 * Lake, which support AVX512VNNI but downclock a bit too eagerly when ZMM
 * registers are used.
 */
#  define adler32_x86_avx512_vl256_vnni adler32_x86_avx512_vl256_vnni
#  define SUFFIX           _avx512_vl256_vnni
#  define ATTRIBUTES    _target_attribute("avx512bw,avx512vl,avx512vnni")
#  define VL      32
#  define USE_VNNI    1
#  define USE_AVX512    1
#  include "adler32_template.h"

/*
 * AVX512VNNI implementation using 512-bit vectors.  This is used on CPUs that
 * have a good AVX-512 implementation including AVX512VNNI.
 */
#  define adler32_x86_avx512_vl512_vnni adler32_x86_avx512_vl512_vnni
#  define SUFFIX           _avx512_vl512_vnni
#  define ATTRIBUTES    _target_attribute("avx512bw,avx512vnni")
#  define VL      64
#  define USE_VNNI    1
#  define USE_AVX512    1
#  include "adler32_template.h"
#endif

static inline adler32_func_t
arch_select_adler32_func(void)
{
  const u32 features MAYBE_UNUSED = get_x86_cpu_features();

#ifdef adler32_x86_avx512_vl512_vnni
  if ((features & X86_CPU_FEATURE_ZMM) &&
      HAVE_AVX512BW(features) && HAVE_AVX512VNNI(features))
    return adler32_x86_avx512_vl512_vnni;
#endif
#ifdef adler32_x86_avx512_vl256_vnni
  if (HAVE_AVX512BW(features) && HAVE_AVX512VL(features) &&
      HAVE_AVX512VNNI(features))
    return adler32_x86_avx512_vl256_vnni;
#endif
#ifdef adler32_x86_avx2_vnni
  if (HAVE_AVX2(features) && HAVE_AVXVNNI(features))
    return adler32_x86_avx2_vnni;
#endif
#ifdef adler32_x86_avx2
  if (HAVE_AVX2(features))
    return adler32_x86_avx2;
#endif
#ifdef adler32_x86_sse2
  if (HAVE_SSE2(features))
    return adler32_x86_sse2;
#endif
  return NULL;
}
#define arch_select_adler32_func  arch_select_adler32_func

#endif /* LIB_X86_ADLER32_IMPL_H */

Coverage Report

Created: 2025-06-16 07:00

Line	Count	Source (jump to first uncovered line)
1		/*
2		* x86/adler32_impl.h - x86 implementations of Adler-32 checksum algorithm
3		*
4		* Copyright 2016 Eric Biggers
5		*
6		* Permission is hereby granted, free of charge, to any person
7		* obtaining a copy of this software and associated documentation
8		* files (the "Software"), to deal in the Software without
9		* restriction, including without limitation the rights to use,
10		* copy, modify, merge, publish, distribute, sublicense, and/or sell
11		* copies of the Software, and to permit persons to whom the
12		* Software is furnished to do so, subject to the following
13		* conditions:
14		*
15		* The above copyright notice and this permission notice shall be
16		* included in all copies or substantial portions of the Software.
17		*
18		* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19		* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20		* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21		* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22		* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23		* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24		* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25		* OTHER DEALINGS IN THE SOFTWARE.
26		*/
27
28		#ifndef LIB_X86_ADLER32_IMPL_H
29		#define LIB_X86_ADLER32_IMPL_H
30
31		#include "cpu_features.h"
32
33		/* SSE2 and AVX2 implementations. Used on older CPUs. */
34		#if defined(__GNUC__) \|\| defined(__clang__) \|\| defined(_MSC_VER)
35	0	# define adler32_x86_sse2 adler32_x86_sse2
36		# define SUFFIX _sse2
37		# define ATTRIBUTES _target_attribute("sse2")
38	0	# define VL 16
39		# define USE_VNNI 0
40		# define USE_AVX512 0
41		# include "adler32_template.h"
42
43	6	# define adler32_x86_avx2 adler32_x86_avx2
44		# define SUFFIX _avx2
45		# define ATTRIBUTES _target_attribute("avx2")
46	870k	# define VL 32
47		# define USE_VNNI 0
48		# define USE_AVX512 0
49		# include "adler32_template.h"
50		#endif
51
52		/*
53		* AVX-VNNI implementation. This is used on CPUs that have AVX2 and AVX-VNNI
54		* but don't have AVX-512, for example Intel Alder Lake.
55		*
56		* Unusually for a new CPU feature, gcc added support for the AVX-VNNI
57		* intrinsics (in gcc 11.1) slightly before binutils added support for
58		* assembling AVX-VNNI instructions (in binutils 2.36). Distros can reasonably
59		* have gcc 11 with binutils 2.35. Because of this issue, we check for gcc 12
60		* instead of gcc 11. (libdeflate supports direct compilation without a
61		* configure step, so checking the binutils version is not always an option.)
62		*/
63		#if (GCC_PREREQ(12, 1) \|\| CLANG_PREREQ(12, 0, 13000000) \|\| MSVC_PREREQ(1930)) && \
64		!defined(LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_AVX_VNNI)
65	0	# define adler32_x86_avx2_vnni adler32_x86_avx2_vnni
66		# define SUFFIX _avx2_vnni
67		# define ATTRIBUTES _target_attribute("avx2,avxvnni")
68	0	# define VL 32
69		# define USE_VNNI 1
70		# define USE_AVX512 0
71		# include "adler32_template.h"
72		#endif
73
74		#if (GCC_PREREQ(8, 1) \|\| CLANG_PREREQ(6, 0, 10000000) \|\| MSVC_PREREQ(1920)) && \
75		!defined(LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_AVX512VNNI)
76		/*
77		* AVX512VNNI implementation using 256-bit vectors. This is very similar to the
78		* AVX-VNNI implementation but takes advantage of masking and more registers.
79		* This is used on certain older Intel CPUs, specifically Ice Lake and Tiger
80		* Lake, which support AVX512VNNI but downclock a bit too eagerly when ZMM
81		* registers are used.
82		*/
83	0	# define adler32_x86_avx512_vl256_vnni adler32_x86_avx512_vl256_vnni
84		# define SUFFIX _avx512_vl256_vnni
85		# define ATTRIBUTES _target_attribute("avx512bw,avx512vl,avx512vnni")
86	0	# define VL 32
87		# define USE_VNNI 1
88		# define USE_AVX512 1
89		# include "adler32_template.h"
90
91		/*
92		* AVX512VNNI implementation using 512-bit vectors. This is used on CPUs that
93		* have a good AVX-512 implementation including AVX512VNNI.
94		*/
95	0	# define adler32_x86_avx512_vl512_vnni adler32_x86_avx512_vl512_vnni
96		# define SUFFIX _avx512_vl512_vnni
97		# define ATTRIBUTES _target_attribute("avx512bw,avx512vnni")
98	0	# define VL 64
99		# define USE_VNNI 1
100		# define USE_AVX512 1
101		# include "adler32_template.h"
102		#endif
103
104		static inline adler32_func_t
105		arch_select_adler32_func(void)
106	6	{
107	6	const u32 features MAYBE_UNUSED = get_x86_cpu_features();
108
109	6	#ifdef adler32_x86_avx512_vl512_vnni
110	6	if ((features & X86_CPU_FEATURE_ZMM) &&
111	6	HAVE_AVX512BW(features) && HAVE_AVX512VNNI(features))
112	0	return adler32_x86_avx512_vl512_vnni;
113	6	#endif
114	6	#ifdef adler32_x86_avx512_vl256_vnni
115	6	if (HAVE_AVX512BW(features) && HAVE_AVX512VL(features) &&
116	6	HAVE_AVX512VNNI(features))
117	0	return adler32_x86_avx512_vl256_vnni;
118	6	#endif
119	6	#ifdef adler32_x86_avx2_vnni
120	6	if (HAVE_AVX2(features) && HAVE_AVXVNNI(features))
121	0	return adler32_x86_avx2_vnni;
122	6	#endif
123	6	#ifdef adler32_x86_avx2
124	6	if (HAVE_AVX2(features))
125	6	return adler32_x86_avx2;
126	0	#endif
127	0	#ifdef adler32_x86_sse2
128	0	if (HAVE_SSE2(features))
129	0	return adler32_x86_sse2;
130	0	#endif
131	0	return NULL;
132	0	}
133	6	#define arch_select_adler32_func arch_select_adler32_func
134
135		#endif /* LIB_X86_ADLER32_IMPL_H */