Coverage Report

Created: 2026-05-16 06:45

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/hashbrown-0.14.5/src/raw/sse2.rs
Line
Count
Source
1
use super::bitmask::BitMask;
2
use super::EMPTY;
3
use core::mem;
4
use core::num::NonZeroU16;
5
6
#[cfg(target_arch = "x86")]
7
use core::arch::x86;
8
#[cfg(target_arch = "x86_64")]
9
use core::arch::x86_64 as x86;
10
11
pub(crate) type BitMaskWord = u16;
12
pub(crate) type NonZeroBitMaskWord = NonZeroU16;
13
pub(crate) const BITMASK_STRIDE: usize = 1;
14
pub(crate) const BITMASK_MASK: BitMaskWord = 0xffff;
15
pub(crate) const BITMASK_ITER_MASK: BitMaskWord = !0;
16
17
/// Abstraction over a group of control bytes which can be scanned in
18
/// parallel.
19
///
20
/// This implementation uses a 128-bit SSE value.
21
#[derive(Copy, Clone)]
22
pub(crate) struct Group(x86::__m128i);
23
24
// FIXME: https://github.com/rust-lang/rust-clippy/issues/3859
25
#[allow(clippy::use_self)]
26
impl Group {
27
    /// Number of bytes in the group.
28
    pub(crate) const WIDTH: usize = mem::size_of::<Self>();
29
30
    /// Returns a full group of empty bytes, suitable for use as the initial
31
    /// value for an empty hash table.
32
    ///
33
    /// This is guaranteed to be aligned to the group size.
34
    #[inline]
35
    #[allow(clippy::items_after_statements)]
36
0
    pub(crate) const fn static_empty() -> &'static [u8; Group::WIDTH] {
37
        #[repr(C)]
38
        struct AlignedBytes {
39
            _align: [Group; 0],
40
            bytes: [u8; Group::WIDTH],
41
        }
42
        const ALIGNED_BYTES: AlignedBytes = AlignedBytes {
43
            _align: [],
44
            bytes: [EMPTY; Group::WIDTH],
45
        };
46
0
        &ALIGNED_BYTES.bytes
47
0
    }
48
49
    /// Loads a group of bytes starting at the given address.
50
    #[inline]
51
    #[allow(clippy::cast_ptr_alignment)] // unaligned load
52
0
    pub(crate) unsafe fn load(ptr: *const u8) -> Self {
53
0
        Group(x86::_mm_loadu_si128(ptr.cast()))
54
0
    }
55
56
    /// Loads a group of bytes starting at the given address, which must be
57
    /// aligned to `mem::align_of::<Group>()`.
58
    #[inline]
59
    #[allow(clippy::cast_ptr_alignment)]
60
0
    pub(crate) unsafe fn load_aligned(ptr: *const u8) -> Self {
61
        // FIXME: use align_offset once it stabilizes
62
0
        debug_assert_eq!(ptr as usize & (mem::align_of::<Self>() - 1), 0);
63
0
        Group(x86::_mm_load_si128(ptr.cast()))
64
0
    }
65
66
    /// Stores the group of bytes to the given address, which must be
67
    /// aligned to `mem::align_of::<Group>()`.
68
    #[inline]
69
    #[allow(clippy::cast_ptr_alignment)]
70
0
    pub(crate) unsafe fn store_aligned(self, ptr: *mut u8) {
71
        // FIXME: use align_offset once it stabilizes
72
0
        debug_assert_eq!(ptr as usize & (mem::align_of::<Self>() - 1), 0);
73
0
        x86::_mm_store_si128(ptr.cast(), self.0);
74
0
    }
75
76
    /// Returns a `BitMask` indicating all bytes in the group which have
77
    /// the given value.
78
    #[inline]
79
0
    pub(crate) fn match_byte(self, byte: u8) -> BitMask {
80
        #[allow(
81
            clippy::cast_possible_wrap, // byte: u8 as i8
82
            // byte: i32 as u16
83
            //   note: _mm_movemask_epi8 returns a 16-bit mask in a i32, the
84
            //   upper 16-bits of the i32 are zeroed:
85
            clippy::cast_sign_loss,
86
            clippy::cast_possible_truncation
87
        )]
88
        unsafe {
89
0
            let cmp = x86::_mm_cmpeq_epi8(self.0, x86::_mm_set1_epi8(byte as i8));
90
0
            BitMask(x86::_mm_movemask_epi8(cmp) as u16)
91
        }
92
0
    }
93
94
    /// Returns a `BitMask` indicating all bytes in the group which are
95
    /// `EMPTY`.
96
    #[inline]
97
0
    pub(crate) fn match_empty(self) -> BitMask {
98
0
        self.match_byte(EMPTY)
99
0
    }
100
101
    /// Returns a `BitMask` indicating all bytes in the group which are
102
    /// `EMPTY` or `DELETED`.
103
    #[inline]
104
0
    pub(crate) fn match_empty_or_deleted(self) -> BitMask {
105
        #[allow(
106
            // byte: i32 as u16
107
            //   note: _mm_movemask_epi8 returns a 16-bit mask in a i32, the
108
            //   upper 16-bits of the i32 are zeroed:
109
            clippy::cast_sign_loss,
110
            clippy::cast_possible_truncation
111
        )]
112
        unsafe {
113
            // A byte is EMPTY or DELETED iff the high bit is set
114
0
            BitMask(x86::_mm_movemask_epi8(self.0) as u16)
115
        }
116
0
    }
117
118
    /// Returns a `BitMask` indicating all bytes in the group which are full.
119
    #[inline]
120
0
    pub(crate) fn match_full(&self) -> BitMask {
121
0
        self.match_empty_or_deleted().invert()
122
0
    }
123
124
    /// Performs the following transformation on all bytes in the group:
125
    /// - `EMPTY => EMPTY`
126
    /// - `DELETED => EMPTY`
127
    /// - `FULL => DELETED`
128
    #[inline]
129
0
    pub(crate) fn convert_special_to_empty_and_full_to_deleted(self) -> Self {
130
        // Map high_bit = 1 (EMPTY or DELETED) to 1111_1111
131
        // and high_bit = 0 (FULL) to 1000_0000
132
        //
133
        // Here's this logic expanded to concrete values:
134
        //   let special = 0 > byte = 1111_1111 (true) or 0000_0000 (false)
135
        //   1111_1111 | 1000_0000 = 1111_1111
136
        //   0000_0000 | 1000_0000 = 1000_0000
137
        #[allow(
138
            clippy::cast_possible_wrap, // byte: 0x80_u8 as i8
139
        )]
140
        unsafe {
141
0
            let zero = x86::_mm_setzero_si128();
142
0
            let special = x86::_mm_cmpgt_epi8(zero, self.0);
143
0
            Group(x86::_mm_or_si128(
144
0
                special,
145
0
                x86::_mm_set1_epi8(0x80_u8 as i8),
146
0
            ))
147
        }
148
0
    }
149
}