/src/zlib-ng/arch/x86/x86_features.c
Line | Count | Source |
1 | | /* x86_features.c - x86 feature check |
2 | | * |
3 | | * Copyright (C) 2013 Intel Corporation. All rights reserved. |
4 | | * Author: |
5 | | * Jim Kukunas |
6 | | * |
7 | | * For conditions of distribution and use, see copyright notice in zlib.h |
8 | | */ |
9 | | |
10 | | #ifdef X86_FEATURES |
11 | | |
12 | | #include "zbuild.h" |
13 | | #include "x86_features.h" |
14 | | |
15 | | #if defined(HAVE_CPUID_MS) |
16 | | # include <intrin.h> |
17 | | #elif defined(HAVE_CPUID_GNU) |
18 | | // Newer versions of GCC and clang come with cpuid.h |
19 | | # include <cpuid.h> |
20 | | # ifdef X86_HAVE_XSAVE_INTRIN |
21 | | # if __GNUC__ == 8 |
22 | | # include <xsaveintrin.h> |
23 | | # else |
24 | | # include <immintrin.h> |
25 | | # endif |
26 | | # endif |
27 | | #endif |
28 | | |
29 | 2 | static inline void cpuid(int info, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx) { |
30 | | #if defined(HAVE_CPUID_MS) |
31 | | unsigned int registers[4]; |
32 | | __cpuid((int *)registers, info); |
33 | | |
34 | | *eax = registers[0]; |
35 | | *ebx = registers[1]; |
36 | | *ecx = registers[2]; |
37 | | *edx = registers[3]; |
38 | | #elif defined(HAVE_CPUID_GNU) |
39 | | *eax = *ebx = *ecx = *edx = 0; |
40 | 2 | __cpuid(info, *eax, *ebx, *ecx, *edx); |
41 | | #else |
42 | | /* When using this fallback, the faster SSE/AVX code is disabled */ |
43 | | *eax = *ebx = *ecx = *edx = 0; |
44 | | #endif |
45 | 2 | } |
46 | | |
47 | 1 | static inline void cpuidex(int info, int subinfo, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx) { |
48 | | #if defined(HAVE_CPUID_MS) |
49 | | unsigned int registers[4]; |
50 | | __cpuidex((int *)registers, info, subinfo); |
51 | | |
52 | | *eax = registers[0]; |
53 | | *ebx = registers[1]; |
54 | | *ecx = registers[2]; |
55 | | *edx = registers[3]; |
56 | | #elif defined(HAVE_CPUID_GNU) |
57 | | *eax = *ebx = *ecx = *edx = 0; |
58 | 1 | __cpuid_count(info, subinfo, *eax, *ebx, *ecx, *edx); |
59 | | #else |
60 | | /* When using this fallback, the faster SSE/AVX code is disabled */ |
61 | | *eax = *ebx = *ecx = *edx = 0; |
62 | | #endif |
63 | 1 | } |
64 | | |
65 | 1 | static inline uint64_t xgetbv(unsigned int xcr) { |
66 | 1 | #if defined(_MSC_VER) || defined(X86_HAVE_XSAVE_INTRIN) |
67 | 1 | return _xgetbv(xcr); |
68 | | #elif defined(__GNUC__) |
69 | | uint32_t eax, edx; |
70 | | __asm__ ( ".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(xcr)); |
71 | | return (uint64_t)(edx) << 32 | eax; |
72 | | #else |
73 | | /* When using this fallback, some of the faster code is disabled */ |
74 | | return 0; |
75 | | #endif |
76 | 1 | } |
77 | | |
78 | 1 | void Z_INTERNAL x86_check_features(struct x86_cpu_features *features) { |
79 | 1 | unsigned eax, ebx, ecx, edx; |
80 | 1 | unsigned maxbasic; |
81 | | |
82 | 1 | cpuid(0, &maxbasic, &ebx, &ecx, &edx); |
83 | 1 | cpuid(1 /*CPU_PROCINFO_AND_FEATUREBITS*/, &eax, &ebx, &ecx, &edx); |
84 | | |
85 | 1 | features->has_sse2 = edx & 0x4000000; |
86 | 1 | features->has_ssse3 = ecx & 0x200; |
87 | 1 | features->has_sse41 = ecx & 0x80000; |
88 | 1 | features->has_sse42 = ecx & 0x100000; |
89 | 1 | features->has_pclmulqdq = ecx & 0x2; |
90 | | |
91 | 1 | if (ecx & 0x08000000) { |
92 | 1 | uint64_t xfeature = xgetbv(0); |
93 | | |
94 | 1 | features->has_os_save_ymm = ((xfeature & 0x06) == 0x06); |
95 | 1 | features->has_os_save_zmm = ((xfeature & 0xe6) == 0xe6); |
96 | 1 | } |
97 | | |
98 | 1 | if (maxbasic >= 7) { |
99 | | // Reference: https://software.intel.com/sites/default/files/article/405250/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family.pdf |
100 | 1 | cpuidex(7, 0, &eax, &ebx, &ecx, &edx); |
101 | | |
102 | | // check BMI2 bit |
103 | 1 | features->has_bmi2 = ebx & 0x100; |
104 | | |
105 | | // check AVX2 bit if the OS supports saving YMM registers |
106 | 1 | if (features->has_os_save_ymm) { |
107 | 1 | features->has_avx2 = ebx & 0x20; |
108 | 1 | } |
109 | | |
110 | | // check AVX512 bits if the OS supports saving ZMM registers |
111 | 1 | if (features->has_os_save_zmm) { |
112 | 0 | features->has_avx512f = ebx & 0x00010000; |
113 | 0 | if (features->has_avx512f) { |
114 | | // According to the Intel Software Developer's Manual, AVX512F must be enabled too in order to enable |
115 | | // AVX512(DQ,BW,VL). |
116 | 0 | features->has_avx512dq = ebx & 0x00020000; |
117 | 0 | features->has_avx512bw = ebx & 0x40000000; |
118 | 0 | features->has_avx512vl = ebx & 0x80000000; |
119 | 0 | } |
120 | 0 | features->has_avx512_common = features->has_avx512f && features->has_avx512dq && features->has_avx512bw \ |
121 | 0 | && features->has_avx512vl && features->has_bmi2; |
122 | 0 | features->has_avx512vnni = ecx & 0x800; |
123 | 0 | features->has_vpclmulqdq = ecx & 0x400; |
124 | 0 | } |
125 | 1 | } |
126 | 1 | } |
127 | | |
128 | | #endif |