/rust/registry/src/index.crates.io-1949cf8c6b5b557f/image-webp-0.2.4/src/alpha_blending.rs
Line  | Count  | Source  | 
1  |  | //! Optimized alpha blending routines based on libwebp  | 
2  |  | //!  | 
3  |  | //! <https://github.com/webmproject/libwebp/blob/e4f7a9f0c7c9fbfae1568bc7fa5c94b989b50872/src/demux/anim_decode.c#L215-L267>  | 
4  |  |  | 
5  | 13.0M  | const fn channel_shift(i: u32) -> u32 { | 
6  | 13.0M  |     i * 8  | 
7  | 13.0M  | }  | 
8  |  |  | 
9  |  | /// Blend a single channel of `src` over `dst`, given their alpha channel values.  | 
10  |  | /// `src` and `dst` are assumed to be NOT pre-multiplied by alpha.  | 
11  | 2.96M  | fn blend_channel_nonpremult(  | 
12  | 2.96M  |     src: u32,  | 
13  | 2.96M  |     src_a: u8,  | 
14  | 2.96M  |     dst: u32,  | 
15  | 2.96M  |     dst_a: u8,  | 
16  | 2.96M  |     scale: u32,  | 
17  | 2.96M  |     shift: u32,  | 
18  | 2.96M  | ) -> u8 { | 
19  | 2.96M  |     let src_channel = ((src >> shift) & 0xff) as u8;  | 
20  | 2.96M  |     let dst_channel = ((dst >> shift) & 0xff) as u8;  | 
21  | 2.96M  |     let blend_unscaled =  | 
22  | 2.96M  |         (u32::from(src_channel) * u32::from(src_a)) + (u32::from(dst_channel) * u32::from(dst_a));  | 
23  | 2.96M  |     debug_assert!(u64::from(blend_unscaled) < (1u64 << 32) / u64::from(scale));  | 
24  | 2.96M  |     ((blend_unscaled * scale) >> channel_shift(3)) as u8  | 
25  | 2.96M  | }  | 
26  |  |  | 
27  |  | /// Blend `src` over `dst` assuming they are NOT pre-multiplied by alpha.  | 
28  | 2.20M  | fn blend_pixel_nonpremult(src: u32, dst: u32) -> u32 { | 
29  | 2.20M  |     let src_a = ((src >> channel_shift(3)) & 0xff) as u8;  | 
30  |  |  | 
31  | 2.20M  |     if src_a == 0 { | 
32  | 1.21M  |         dst  | 
33  |  |     } else { | 
34  | 989k  |         let dst_a = ((dst >> channel_shift(3)) & 0xff) as u8;  | 
35  |  |         // Approximate integer arithmetic for: dst_factor_a = (dst_a * (255 - src_a)) / 255  | 
36  |  |         // libwebp used the following formula here:  | 
37  |  |         //let dst_factor_a = (dst_a as u32 * (256 - src_a as u32)) >> 8;  | 
38  |  |         // however, we've found that we can use a more precise approximation without losing performance:  | 
39  | 989k  |         let dst_factor_a = div_by_255(u32::from(dst_a) * (255 - u32::from(src_a)));  | 
40  | 989k  |         let blend_a = u32::from(src_a) + dst_factor_a;  | 
41  | 989k  |         let scale = (1u32 << 24) / blend_a;  | 
42  |  |  | 
43  | 989k  |         let blend_r =  | 
44  | 989k  |             blend_channel_nonpremult(src, src_a, dst, dst_factor_a as u8, scale, channel_shift(0));  | 
45  | 989k  |         let blend_g =  | 
46  | 989k  |             blend_channel_nonpremult(src, src_a, dst, dst_factor_a as u8, scale, channel_shift(1));  | 
47  | 989k  |         let blend_b =  | 
48  | 989k  |             blend_channel_nonpremult(src, src_a, dst, dst_factor_a as u8, scale, channel_shift(2));  | 
49  | 989k  |         debug_assert!(u32::from(src_a) + dst_factor_a < 256);  | 
50  |  |  | 
51  | 989k  |         (u32::from(blend_r) << channel_shift(0))  | 
52  | 989k  |             | (u32::from(blend_g) << channel_shift(1))  | 
53  | 989k  |             | (u32::from(blend_b) << channel_shift(2))  | 
54  | 989k  |             | (blend_a << channel_shift(3))  | 
55  |  |     }  | 
56  | 2.20M  | }  | 
57  |  |  | 
58  | 2.20M  | pub(crate) fn do_alpha_blending(buffer: [u8; 4], canvas: [u8; 4]) -> [u8; 4] { | 
59  |  |     // The original C code contained different shift functions for different endianness,  | 
60  |  |     // but they didn't work when ported to Rust directly (and probably didn't work in C either).  | 
61  |  |     // So instead we reverse the order of bytes on big-endian here, at the interface.  | 
62  |  |     // `from_le_bytes` is a no-op on little endian (most systems) and a cheap shuffle on big endian.  | 
63  | 2.20M  |     blend_pixel_nonpremult(u32::from_le_bytes(buffer), u32::from_le_bytes(canvas)).to_le_bytes()  | 
64  | 2.20M  | }  | 
65  |  |  | 
66  |  | /// Divides by 255, rounding to nearest (as opposed to down, like regular integer division does).  | 
67  |  | /// TODO: cannot output 256, so the output is effecitively u8. Plumb that through the code.  | 
68  |  | //  | 
69  |  | // Sources:  | 
70  |  | // https://arxiv.org/pdf/2202.02864  | 
71  |  | // https://github.com/image-rs/image-webp/issues/119#issuecomment-2544007820  | 
72  |  | #[inline]  | 
73  | 989k  | const fn div_by_255(v: u32) -> u32 { | 
74  | 989k  |     (((v + 0x80) >> 8) + v + 0x80) >> 8  | 
75  | 989k  | }  | 
76  |  |  | 
77  |  | #[cfg(test)]  | 
78  |  | mod tests { | 
79  |  |     use super::*;  | 
80  |  |  | 
81  |  |     fn do_alpha_blending_reference(buffer: [u8; 4], canvas: [u8; 4]) -> [u8; 4] { | 
82  |  |         let canvas_alpha = f64::from(canvas[3]);  | 
83  |  |         let buffer_alpha = f64::from(buffer[3]);  | 
84  |  |         let blend_alpha_f64 = buffer_alpha + canvas_alpha * (1.0 - buffer_alpha / 255.0);  | 
85  |  |         //value should be between 0 and 255, this truncates the fractional part  | 
86  |  |         let blend_alpha: u8 = blend_alpha_f64 as u8;  | 
87  |  |  | 
88  |  |         let blend_rgb: [u8; 3] = if blend_alpha == 0 { | 
89  |  |             [0, 0, 0]  | 
90  |  |         } else { | 
91  |  |             let mut rgb = [0u8; 3];  | 
92  |  |             for i in 0..3 { | 
93  |  |                 let canvas_f64 = f64::from(canvas[i]);  | 
94  |  |                 let buffer_f64 = f64::from(buffer[i]);  | 
95  |  |  | 
96  |  |                 let val = (buffer_f64 * buffer_alpha  | 
97  |  |                     + canvas_f64 * canvas_alpha * (1.0 - buffer_alpha / 255.0))  | 
98  |  |                     / blend_alpha_f64;  | 
99  |  |                 //value should be between 0 and 255, this truncates the fractional part  | 
100  |  |                 rgb[i] = val as u8;  | 
101  |  |             }  | 
102  |  |  | 
103  |  |             rgb  | 
104  |  |         };  | 
105  |  |  | 
106  |  |         [blend_rgb[0], blend_rgb[1], blend_rgb[2], blend_alpha]  | 
107  |  |     }  | 
108  |  |  | 
109  |  |     #[test]  | 
110  |  |     #[ignore] // takes too long to run on CI. Run this locally when changing the function.  | 
111  |  |     fn alpha_blending_optimization() { | 
112  |  |         for r1 in 0..u8::MAX { | 
113  |  |             for a1 in 11..u8::MAX { | 
114  |  |                 for r2 in 0..u8::MAX { | 
115  |  |                     for a2 in 11..u8::MAX { | 
116  |  |                         let opt = do_alpha_blending([r1, 0, 0, a1], [r2, 0, 0, a2]);  | 
117  |  |                         let slow = do_alpha_blending_reference([r1, 0, 0, a1], [r2, 0, 0, a2]);  | 
118  |  |                         // libwebp doesn't do exact blending and so we don't either  | 
119  |  |                         for (o, s) in opt.iter().zip(slow.iter()) { | 
120  |  |                             assert!(  | 
121  |  |                                 o.abs_diff(*s) <= 3,  | 
122  |  |                                 "Mismatch in results! opt: {opt:?}, slow: {slow:?}, blended values: [{r1}, 0, 0, {a1}], [{r2}, 0, 0, {a2}]" | 
123  |  |                             );  | 
124  |  |                         }  | 
125  |  |                     }  | 
126  |  |                 }  | 
127  |  |             }  | 
128  |  |         }  | 
129  |  |     }  | 
130  |  | }  |