/src/libdeflate/lib/x86/decompress_impl.h
Line | Count | Source (jump to first uncovered line) |
1 | | #ifndef LIB_X86_DECOMPRESS_IMPL_H |
2 | | #define LIB_X86_DECOMPRESS_IMPL_H |
3 | | |
4 | | #include "cpu_features.h" |
5 | | |
6 | | /* |
7 | | * BMI2 optimized decompression function. |
8 | | * |
9 | | * With gcc and clang we just compile the whole function with |
10 | | * __attribute__((target("bmi2"))), and the compiler uses bmi2 automatically. |
11 | | * |
12 | | * With MSVC, there is no target function attribute, but it's still possible to |
13 | | * use bmi2 intrinsics explicitly. Currently we mostly don't, but there's a |
14 | | * case in which we do (see below), so we at least take advantage of that. |
15 | | * However, MSVC from VS2017 (toolset v141) apparently miscompiles the _bzhi_*() |
16 | | * intrinsics. It seems to be fixed in VS2022. Hence, use MSVC_PREREQ(1930). |
17 | | */ |
18 | | #if defined(__GNUC__) || defined(__clang__) || MSVC_PREREQ(1930) |
19 | 6 | # define deflate_decompress_bmi2 deflate_decompress_bmi2 |
20 | | # define FUNCNAME deflate_decompress_bmi2 |
21 | | # define ATTRIBUTES _target_attribute("bmi2") |
22 | | /* |
23 | | * Even with __attribute__((target("bmi2"))), gcc doesn't reliably use the |
24 | | * bzhi instruction for 'word & BITMASK(count)'. So use the bzhi intrinsic |
25 | | * explicitly. EXTRACT_VARBITS() is equivalent to 'word & BITMASK(count)'; |
26 | | * EXTRACT_VARBITS8() is equivalent to 'word & BITMASK((u8)count)'. |
27 | | * Nevertheless, their implementation using the bzhi intrinsic is identical, |
28 | | * as the bzhi instruction truncates the count to 8 bits implicitly. |
29 | | */ |
30 | | # ifndef __clang__ |
31 | | # ifdef ARCH_X86_64 |
32 | | # define EXTRACT_VARBITS(word, count) _bzhi_u64((word), (count)) |
33 | | # define EXTRACT_VARBITS8(word, count) _bzhi_u64((word), (count)) |
34 | | # else |
35 | | # define EXTRACT_VARBITS(word, count) _bzhi_u32((word), (count)) |
36 | | # define EXTRACT_VARBITS8(word, count) _bzhi_u32((word), (count)) |
37 | | # endif |
38 | | # endif |
39 | | # include "../decompress_template.h" |
40 | | #endif |
41 | | |
42 | | #if defined(deflate_decompress_bmi2) && HAVE_BMI2_NATIVE |
43 | | #define DEFAULT_IMPL deflate_decompress_bmi2 |
44 | | #else |
45 | | static inline decompress_func_t |
46 | | arch_select_decompress_func(void) |
47 | 6 | { |
48 | 6 | #ifdef deflate_decompress_bmi2 |
49 | 6 | if (HAVE_BMI2(get_x86_cpu_features())) |
50 | 6 | return deflate_decompress_bmi2; |
51 | 0 | #endif |
52 | 0 | return NULL; |
53 | 6 | } |
54 | 6 | #define arch_select_decompress_func arch_select_decompress_func |
55 | | #endif |
56 | | |
57 | | #endif /* LIB_X86_DECOMPRESS_IMPL_H */ |