/src/zlib-ng/arch/x86/x86_features.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* x86_features.c - x86 feature check |
2 | | * |
3 | | * Copyright (C) 2013 Intel Corporation. All rights reserved. |
4 | | * Author: |
5 | | * Jim Kukunas |
6 | | * |
7 | | * For conditions of distribution and use, see copyright notice in zlib.h |
8 | | */ |
9 | | |
10 | | #include "zbuild.h" |
11 | | #include "x86_features.h" |
12 | | |
13 | | |
14 | | #if defined(HAVE_CPUID_MS) |
15 | | # include <intrin.h> |
16 | | #elif defined(HAVE_CPUID_GNU) |
17 | | // Newer versions of GCC and clang come with cpuid.h |
18 | | # include <cpuid.h> |
19 | | # ifdef X86_HAVE_XSAVE_INTRIN |
20 | | # if __GNUC__ == 8 |
21 | | # include <xsaveintrin.h> |
22 | | # else |
23 | | # include <immintrin.h> |
24 | | # endif |
25 | | # endif |
26 | | #endif |
27 | | |
28 | | #include <string.h> |
29 | | |
30 | 2 | static inline void cpuid(int info, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx) { |
31 | | #if defined(HAVE_CPUID_MS) |
32 | | unsigned int registers[4]; |
33 | | __cpuid((int *)registers, info); |
34 | | |
35 | | *eax = registers[0]; |
36 | | *ebx = registers[1]; |
37 | | *ecx = registers[2]; |
38 | | *edx = registers[3]; |
39 | | #elif defined(HAVE_CPUID_GNU) |
40 | | *eax = *ebx = *ecx = *edx = 0; |
41 | 2 | __cpuid(info, *eax, *ebx, *ecx, *edx); |
42 | | #else |
43 | | /* When using this fallback, the faster SSE/AVX code is disabled */ |
44 | | *eax = *ebx = *ecx = *edx = 0; |
45 | | #endif |
46 | 2 | } |
47 | | |
48 | 1 | static inline void cpuidex(int info, int subinfo, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx) { |
49 | | #if defined(HAVE_CPUID_MS) |
50 | | unsigned int registers[4]; |
51 | | __cpuidex((int *)registers, info, subinfo); |
52 | | |
53 | | *eax = registers[0]; |
54 | | *ebx = registers[1]; |
55 | | *ecx = registers[2]; |
56 | | *edx = registers[3]; |
57 | | #elif defined(HAVE_CPUID_GNU) |
58 | | *eax = *ebx = *ecx = *edx = 0; |
59 | 1 | __cpuid_count(info, subinfo, *eax, *ebx, *ecx, *edx); |
60 | | #else |
61 | | /* When using this fallback, the faster SSE/AVX code is disabled */ |
62 | | *eax = *ebx = *ecx = *edx = 0; |
63 | | #endif |
64 | 1 | } |
65 | | |
66 | 1 | static inline uint64_t xgetbv(unsigned int xcr) { |
67 | 1 | #if defined(_MSC_VER) || defined(X86_HAVE_XSAVE_INTRIN) |
68 | 1 | return _xgetbv(xcr); |
69 | | #elif defined(__GNUC__) |
70 | | uint32_t eax, edx; |
71 | | __asm__ ( ".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(xcr)); |
72 | | return (uint64_t)(edx) << 32 | eax; |
73 | | #else |
74 | | /* When using this fallback, some of the faster code is disabled */ |
75 | | return 0; |
76 | | #endif |
77 | 1 | } |
78 | | |
79 | 1 | void Z_INTERNAL x86_check_features(struct x86_cpu_features *features) { |
80 | 1 | unsigned eax, ebx, ecx, edx; |
81 | 1 | unsigned maxbasic; |
82 | | |
83 | 1 | cpuid(0, &maxbasic, &ebx, &ecx, &edx); |
84 | 1 | cpuid(1 /*CPU_PROCINFO_AND_FEATUREBITS*/, &eax, &ebx, &ecx, &edx); |
85 | | |
86 | 1 | features->has_sse2 = edx & 0x4000000; |
87 | 1 | features->has_ssse3 = ecx & 0x200; |
88 | 1 | features->has_sse41 = ecx & 0x80000; |
89 | 1 | features->has_sse42 = ecx & 0x100000; |
90 | 1 | features->has_pclmulqdq = ecx & 0x2; |
91 | | |
92 | 1 | if (ecx & 0x08000000) { |
93 | 1 | uint64_t xfeature = xgetbv(0); |
94 | | |
95 | 1 | features->has_os_save_ymm = ((xfeature & 0x06) == 0x06); |
96 | 1 | features->has_os_save_zmm = ((xfeature & 0xe6) == 0xe6); |
97 | 1 | } |
98 | | |
99 | 1 | if (maxbasic >= 7) { |
100 | | // Reference: https://software.intel.com/sites/default/files/article/405250/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family.pdf |
101 | 1 | cpuidex(7, 0, &eax, &ebx, &ecx, &edx); |
102 | | |
103 | | // check BMI2 bit |
104 | 1 | features->has_bmi2 = ebx & 0x8; |
105 | | |
106 | | // check AVX2 bit if the OS supports saving YMM registers |
107 | 1 | if (features->has_os_save_ymm) { |
108 | 1 | features->has_avx2 = ebx & 0x20; |
109 | 1 | } |
110 | | |
111 | | // check AVX512 bits if the OS supports saving ZMM registers |
112 | 1 | if (features->has_os_save_zmm) { |
113 | 0 | features->has_avx512f = ebx & 0x00010000; |
114 | 0 | if (features->has_avx512f) { |
115 | | // According to the Intel Software Developer's Manual, AVX512F must be enabled too in order to enable |
116 | | // AVX512(DQ,BW,VL). |
117 | 0 | features->has_avx512dq = ebx & 0x00020000; |
118 | 0 | features->has_avx512bw = ebx & 0x40000000; |
119 | 0 | features->has_avx512vl = ebx & 0x80000000; |
120 | 0 | } |
121 | 0 | features->has_avx512_common = features->has_avx512f && features->has_avx512dq && features->has_avx512bw \ |
122 | 0 | && features->has_avx512vl && features->has_bmi2; |
123 | 0 | features->has_avx512vnni = ecx & 0x800; |
124 | 0 | features->has_vpclmulqdq = ecx & 0x400; |
125 | 0 | } |
126 | 1 | } |
127 | 1 | } |