Coverage Report

Created: 2026-03-10 07:34

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/jpeg-encoder-0.7.0/src/avx2/ycbcr.rs
Line
Count
Source
1
#[cfg(target_arch = "x86")]
2
use core::arch::x86::{
3
    __m256i, _mm256_add_epi32, _mm256_mullo_epi32, _mm256_set_epi32, _mm256_set1_epi32,
4
    _mm256_srli_epi32, _mm256_sub_epi32,
5
};
6
7
#[cfg(target_arch = "x86_64")]
8
use core::arch::x86_64::{
9
    __m256i, _mm256_add_epi32, _mm256_mullo_epi32, _mm256_set_epi32, _mm256_set1_epi32,
10
    _mm256_srli_epi32, _mm256_sub_epi32,
11
};
12
13
use alloc::vec::Vec;
14
15
use crate::{ImageBuffer, JpegColorType, rgb_to_ycbcr};
16
17
macro_rules! ycbcr_image_avx2 {
18
    ($name:ident, $num_colors:expr, $o1:expr, $o2:expr, $o3:expr) => {
19
        pub struct $name<'a>(pub &'a [u8], pub u16, pub u16);
20
21
        impl<'a> $name<'a> {
22
            #[target_feature(enable = "avx2")]
23
0
            fn fill_buffers_avx2(&self, y: u16, buffers: &mut [Vec<u8>; 4]) {
24
                // TODO: this compiles to many separate scalar loads and could be optimized further.
25
                // But the gains are no more than 3% end to end and it doesn't seem to be worth the complexity.
26
                #[inline]
27
                #[target_feature(enable = "avx2")]
28
0
                fn load3(data: &[u8]) -> __m256i {
29
0
                    _ = data[7 * $num_colors]; // dummy indexing operation up front to avoid bounds checks later
30
0
                    _mm256_set_epi32(
31
0
                        data[0] as i32,
32
0
                        data[1 * $num_colors] as i32,
33
0
                        data[2 * $num_colors] as i32,
34
0
                        data[3 * $num_colors] as i32,
35
0
                        data[4 * $num_colors] as i32,
36
0
                        data[5 * $num_colors] as i32,
37
0
                        data[6 * $num_colors] as i32,
38
0
                        data[7 * $num_colors] as i32,
39
                    )
40
0
                }
Unexecuted instantiation: <jpeg_encoder::avx2::ycbcr::RgbImageAVX2>::fill_buffers_avx2::load3
Unexecuted instantiation: <jpeg_encoder::avx2::ycbcr::RgbaImageAVX2>::fill_buffers_avx2::load3
Unexecuted instantiation: <jpeg_encoder::avx2::ycbcr::BgrImageAVX2>::fill_buffers_avx2::load3
Unexecuted instantiation: <jpeg_encoder::avx2::ycbcr::BgraImageAVX2>::fill_buffers_avx2::load3
41
42
                #[inline]
43
                #[target_feature(enable = "avx2")]
44
0
                fn avx_as_i32_array(data: __m256i) -> [i32; 8] {
45
                    // Safety preconditions. Optimized away in release mode, no runtime cost.
46
0
                    assert!(core::mem::size_of::<__m256i>() == core::mem::size_of::<[i32; 8]>());
47
0
                    assert!(core::mem::align_of::<__m256i>() >= core::mem::align_of::<[i32; 8]>());
48
                    // SAFETY: size and alignment preconditions checked above.
49
                    // Both types are plain old data: no pointers, lifetimes, etc.
50
0
                    unsafe { core::mem::transmute(data) }
51
0
                }
Unexecuted instantiation: <jpeg_encoder::avx2::ycbcr::RgbImageAVX2>::fill_buffers_avx2::avx_as_i32_array
Unexecuted instantiation: <jpeg_encoder::avx2::ycbcr::RgbaImageAVX2>::fill_buffers_avx2::avx_as_i32_array
Unexecuted instantiation: <jpeg_encoder::avx2::ycbcr::BgrImageAVX2>::fill_buffers_avx2::avx_as_i32_array
Unexecuted instantiation: <jpeg_encoder::avx2::ycbcr::BgraImageAVX2>::fill_buffers_avx2::avx_as_i32_array
52
53
0
                let [y_buffer, cb_buffer, cr_buffer, _] = buffers;
54
0
                y_buffer.reserve(self.width() as usize);
55
0
                cb_buffer.reserve(self.width() as usize);
56
0
                cr_buffer.reserve(self.width() as usize);
57
58
0
                let ymulr = _mm256_set1_epi32(19595);
59
0
                let ymulg = _mm256_set1_epi32(38470);
60
0
                let ymulb = _mm256_set1_epi32(7471);
61
62
0
                let cbmulr = _mm256_set1_epi32(-11059);
63
0
                let cbmulg = _mm256_set1_epi32(21709);
64
0
                let cbmulb = _mm256_set1_epi32(32768);
65
66
0
                let crmulr = _mm256_set1_epi32(32768);
67
0
                let crmulg = _mm256_set1_epi32(27439);
68
0
                let crmulb = _mm256_set1_epi32(5329);
69
70
0
                let mut data = &self.0[(y as usize * self.1 as usize * $num_colors)..];
71
72
0
                for _ in 0..self.width() / 8 {
73
0
                    let r = load3(&data[$o1..]);
74
0
                    let g = load3(&data[$o2..]);
75
0
                    let b = load3(&data[$o3..]);
76
77
0
                    data = &data[($num_colors * 8)..];
78
79
0
                    let yr = _mm256_mullo_epi32(ymulr, r);
80
0
                    let yg = _mm256_mullo_epi32(ymulg, g);
81
0
                    let yb = _mm256_mullo_epi32(ymulb, b);
82
83
0
                    let y = _mm256_add_epi32(_mm256_add_epi32(yr, yg), yb);
84
0
                    let y = _mm256_add_epi32(y, _mm256_set1_epi32(0x7FFF));
85
0
                    let y = _mm256_srli_epi32(y, 16);
86
0
                    let y: [i32; 8] = avx_as_i32_array(y);
87
0
                    let mut y: [u8; 8] = y.map(|x| x as u8);
Unexecuted instantiation: <jpeg_encoder::avx2::ycbcr::RgbImageAVX2>::fill_buffers_avx2::{closure#0}
Unexecuted instantiation: <jpeg_encoder::avx2::ycbcr::RgbaImageAVX2>::fill_buffers_avx2::{closure#0}
Unexecuted instantiation: <jpeg_encoder::avx2::ycbcr::BgrImageAVX2>::fill_buffers_avx2::{closure#0}
Unexecuted instantiation: <jpeg_encoder::avx2::ycbcr::BgraImageAVX2>::fill_buffers_avx2::{closure#0}
88
0
                    y.reverse();
89
0
                    y_buffer.extend_from_slice(&y);
90
91
0
                    let cbr = _mm256_mullo_epi32(cbmulr, r);
92
0
                    let cbg = _mm256_mullo_epi32(cbmulg, g);
93
0
                    let cbb = _mm256_mullo_epi32(cbmulb, b);
94
95
0
                    let cb = _mm256_add_epi32(_mm256_sub_epi32(cbr, cbg), cbb);
96
0
                    let cb = _mm256_add_epi32(cb, _mm256_set1_epi32(128 << 16));
97
0
                    let cb = _mm256_add_epi32(cb, _mm256_set1_epi32(0x7FFF));
98
0
                    let cb = _mm256_srli_epi32(cb, 16);
99
0
                    let cb: [i32; 8] = avx_as_i32_array(cb);
100
0
                    let mut cb: [u8; 8] = cb.map(|x| x as u8);
Unexecuted instantiation: <jpeg_encoder::avx2::ycbcr::RgbImageAVX2>::fill_buffers_avx2::{closure#1}
Unexecuted instantiation: <jpeg_encoder::avx2::ycbcr::RgbaImageAVX2>::fill_buffers_avx2::{closure#1}
Unexecuted instantiation: <jpeg_encoder::avx2::ycbcr::BgrImageAVX2>::fill_buffers_avx2::{closure#1}
Unexecuted instantiation: <jpeg_encoder::avx2::ycbcr::BgraImageAVX2>::fill_buffers_avx2::{closure#1}
101
0
                    cb.reverse();
102
0
                    cb_buffer.extend_from_slice(&cb);
103
104
0
                    let crr = _mm256_mullo_epi32(crmulr, r);
105
0
                    let crg = _mm256_mullo_epi32(crmulg, g);
106
0
                    let crb = _mm256_mullo_epi32(crmulb, b);
107
108
0
                    let cr = _mm256_sub_epi32(_mm256_sub_epi32(crr, crg), crb);
109
0
                    let cr = _mm256_add_epi32(cr, _mm256_set1_epi32(128 << 16));
110
0
                    let cr = _mm256_add_epi32(cr, _mm256_set1_epi32(0x7FFF));
111
0
                    let cr = _mm256_srli_epi32(cr, 16);
112
0
                    let cr: [i32; 8] = avx_as_i32_array(cr);
113
0
                    let mut cr: [u8; 8] = cr.map(|x| x as u8);
Unexecuted instantiation: <jpeg_encoder::avx2::ycbcr::RgbImageAVX2>::fill_buffers_avx2::{closure#2}
Unexecuted instantiation: <jpeg_encoder::avx2::ycbcr::RgbaImageAVX2>::fill_buffers_avx2::{closure#2}
Unexecuted instantiation: <jpeg_encoder::avx2::ycbcr::BgrImageAVX2>::fill_buffers_avx2::{closure#2}
Unexecuted instantiation: <jpeg_encoder::avx2::ycbcr::BgraImageAVX2>::fill_buffers_avx2::{closure#2}
114
0
                    cr.reverse();
115
0
                    cr_buffer.extend_from_slice(&cr);
116
                }
117
118
0
                for _ in 0..self.width() % 8 {
119
0
                    let (y, cb, cr) = rgb_to_ycbcr(data[$o1], data[$o2], data[$o3]);
120
0
                    data = &data[$num_colors..];
121
0
122
0
                    y_buffer.push(y);
123
0
                    cb_buffer.push(cb);
124
0
                    cr_buffer.push(cr);
125
0
                }
126
0
            }
Unexecuted instantiation: <jpeg_encoder::avx2::ycbcr::RgbImageAVX2>::fill_buffers_avx2
Unexecuted instantiation: <jpeg_encoder::avx2::ycbcr::RgbaImageAVX2>::fill_buffers_avx2
Unexecuted instantiation: <jpeg_encoder::avx2::ycbcr::BgrImageAVX2>::fill_buffers_avx2
Unexecuted instantiation: <jpeg_encoder::avx2::ycbcr::BgraImageAVX2>::fill_buffers_avx2
127
        }
128
129
        impl<'a> ImageBuffer for $name<'a> {
130
0
            fn get_jpeg_color_type(&self) -> JpegColorType {
131
0
                JpegColorType::Ycbcr
132
0
            }
Unexecuted instantiation: <jpeg_encoder::avx2::ycbcr::RgbaImageAVX2 as jpeg_encoder::image_buffer::ImageBuffer>::get_jpeg_color_type
Unexecuted instantiation: <jpeg_encoder::avx2::ycbcr::BgrImageAVX2 as jpeg_encoder::image_buffer::ImageBuffer>::get_jpeg_color_type
Unexecuted instantiation: <jpeg_encoder::avx2::ycbcr::BgraImageAVX2 as jpeg_encoder::image_buffer::ImageBuffer>::get_jpeg_color_type
Unexecuted instantiation: <jpeg_encoder::avx2::ycbcr::RgbImageAVX2 as jpeg_encoder::image_buffer::ImageBuffer>::get_jpeg_color_type
133
134
0
            fn width(&self) -> u16 {
135
0
                self.1
136
0
            }
Unexecuted instantiation: <jpeg_encoder::avx2::ycbcr::RgbaImageAVX2 as jpeg_encoder::image_buffer::ImageBuffer>::width
Unexecuted instantiation: <jpeg_encoder::avx2::ycbcr::BgrImageAVX2 as jpeg_encoder::image_buffer::ImageBuffer>::width
Unexecuted instantiation: <jpeg_encoder::avx2::ycbcr::BgraImageAVX2 as jpeg_encoder::image_buffer::ImageBuffer>::width
Unexecuted instantiation: <jpeg_encoder::avx2::ycbcr::RgbImageAVX2 as jpeg_encoder::image_buffer::ImageBuffer>::width
137
138
0
            fn height(&self) -> u16 {
139
0
                self.2
140
0
            }
Unexecuted instantiation: <jpeg_encoder::avx2::ycbcr::RgbaImageAVX2 as jpeg_encoder::image_buffer::ImageBuffer>::height
Unexecuted instantiation: <jpeg_encoder::avx2::ycbcr::BgrImageAVX2 as jpeg_encoder::image_buffer::ImageBuffer>::height
Unexecuted instantiation: <jpeg_encoder::avx2::ycbcr::BgraImageAVX2 as jpeg_encoder::image_buffer::ImageBuffer>::height
Unexecuted instantiation: <jpeg_encoder::avx2::ycbcr::RgbImageAVX2 as jpeg_encoder::image_buffer::ImageBuffer>::height
141
142
            #[inline(always)]
143
0
            fn fill_buffers(&self, y: u16, buffers: &mut [Vec<u8>; 4]) {
144
0
                unsafe {
145
0
                    self.fill_buffers_avx2(y, buffers);
146
0
                }
147
0
            }
Unexecuted instantiation: <jpeg_encoder::avx2::ycbcr::RgbaImageAVX2 as jpeg_encoder::image_buffer::ImageBuffer>::fill_buffers
Unexecuted instantiation: <jpeg_encoder::avx2::ycbcr::BgrImageAVX2 as jpeg_encoder::image_buffer::ImageBuffer>::fill_buffers
Unexecuted instantiation: <jpeg_encoder::avx2::ycbcr::BgraImageAVX2 as jpeg_encoder::image_buffer::ImageBuffer>::fill_buffers
Unexecuted instantiation: <jpeg_encoder::avx2::ycbcr::RgbImageAVX2 as jpeg_encoder::image_buffer::ImageBuffer>::fill_buffers
148
        }
149
    };
150
}
151
152
ycbcr_image_avx2!(RgbImageAVX2, 3, 0, 1, 2);
153
ycbcr_image_avx2!(RgbaImageAVX2, 4, 0, 1, 2);
154
ycbcr_image_avx2!(BgrImageAVX2, 3, 2, 1, 0);
155
ycbcr_image_avx2!(BgraImageAVX2, 4, 2, 1, 0);
156
157
#[cfg(test)]
158
mod tests {
159
    use super::*;
160
    use std::vec::Vec;
161
162
    // A very basic linear congruential generator (LCG) to avoid external dependencies.
163
    pub struct SimpleRng {
164
        state: u64,
165
    }
166
167
    impl SimpleRng {
168
        /// Create a new RNG with a given seed.
169
        pub fn new(seed: u64) -> Self {
170
            Self { state: seed }
171
        }
172
173
        /// Generate the next random u64 value.
174
        pub fn next_u64(&mut self) -> u64 {
175
            // Constants from Numerical Recipes
176
            self.state = self.state.wrapping_mul(6364136223846793005).wrapping_add(1);
177
            self.state
178
        }
179
180
        /// Generate a random byte in 0..=255
181
        pub fn next_byte(&mut self) -> u8 {
182
            (self.next_u64() & 0xFF) as u8
183
        }
184
185
        /// Fill a Vec<u8> with random bytes of the given length.
186
        pub fn random_bytes(&mut self, len: usize) -> Vec<u8> {
187
            (0..len).map(|_| self.next_byte()).collect()
188
        }
189
    }
190
191
    #[test]
192
    #[cfg(feature = "simd")]
193
    fn avx_matches_scalar_rgb() {
194
        // Do not run AVX2 test on machines without it
195
        if !std::is_x86_feature_detected!("avx2") {
196
            return;
197
        }
198
        let mut rng = SimpleRng::new(42);
199
        let width = 512 + 3; // power of two plus a bit to stress remainder handling
200
        let height = 1;
201
        let bpp = 3;
202
203
        let input = rng.random_bytes(width * height * bpp); // power of two plus a bit to exercise remainder handling
204
205
        let scalar_result: Vec<[u8; 3]> = input
206
            .chunks_exact(bpp)
207
            .map(|chunk| {
208
                let [r, g, b, ..] = chunk else { unreachable!() };
209
                let (y, cb, cr) = rgb_to_ycbcr(*r, *g, *b);
210
                [y, cb, cr]
211
            })
212
            .collect();
213
214
        let mut buffers = [Vec::new(), Vec::new(), Vec::new(), Vec::new()];
215
        let avx_input = RgbImageAVX2(
216
            &input,
217
            width.try_into().unwrap(),
218
            height.try_into().unwrap(),
219
        );
220
        // SAFETY: we've checked above that AVX2 is present
221
        unsafe {
222
            avx_input.fill_buffers_avx2(0, &mut buffers);
223
        }
224
225
        for i in 0..3 {
226
            assert_eq!(buffers[i].len(), input.len() / 3);
227
        }
228
229
        for (i, pixel) in scalar_result.iter().copied().enumerate() {
230
            let avx_pixel: [u8; 3] = [buffers[0][i], buffers[1][i], buffers[2][i]];
231
            if pixel != avx_pixel {
232
                panic!(
233
                    "Mismatch at index {i}: scalar result is {pixel:?}, avx result is {avx_pixel:?}"
234
                );
235
            }
236
        }
237
    }
238
}