/rust/registry/src/index.crates.io-1949cf8c6b5b557f/image-webp-0.2.4/src/alpha_blending.rs
Line | Count | Source |
1 | | //! Optimized alpha blending routines based on libwebp |
2 | | //! |
3 | | //! <https://github.com/webmproject/libwebp/blob/e4f7a9f0c7c9fbfae1568bc7fa5c94b989b50872/src/demux/anim_decode.c#L215-L267> |
4 | | |
5 | 15.4M | const fn channel_shift(i: u32) -> u32 { |
6 | 15.4M | i * 8 |
7 | 15.4M | } |
8 | | |
9 | | /// Blend a single channel of `src` over `dst`, given their alpha channel values. |
10 | | /// `src` and `dst` are assumed to be NOT pre-multiplied by alpha. |
11 | 3.52M | fn blend_channel_nonpremult( |
12 | 3.52M | src: u32, |
13 | 3.52M | src_a: u8, |
14 | 3.52M | dst: u32, |
15 | 3.52M | dst_a: u8, |
16 | 3.52M | scale: u32, |
17 | 3.52M | shift: u32, |
18 | 3.52M | ) -> u8 { |
19 | 3.52M | let src_channel = ((src >> shift) & 0xff) as u8; |
20 | 3.52M | let dst_channel = ((dst >> shift) & 0xff) as u8; |
21 | 3.52M | let blend_unscaled = |
22 | 3.52M | (u32::from(src_channel) * u32::from(src_a)) + (u32::from(dst_channel) * u32::from(dst_a)); |
23 | 3.52M | debug_assert!(u64::from(blend_unscaled) < (1u64 << 32) / u64::from(scale)); |
24 | 3.52M | ((blend_unscaled * scale) >> channel_shift(3)) as u8 |
25 | 3.52M | } |
26 | | |
27 | | /// Blend `src` over `dst` assuming they are NOT pre-multiplied by alpha. |
28 | 2.51M | fn blend_pixel_nonpremult(src: u32, dst: u32) -> u32 { |
29 | 2.51M | let src_a = ((src >> channel_shift(3)) & 0xff) as u8; |
30 | | |
31 | 2.51M | if src_a == 0 { |
32 | 1.34M | dst |
33 | | } else { |
34 | 1.17M | let dst_a = ((dst >> channel_shift(3)) & 0xff) as u8; |
35 | | // Approximate integer arithmetic for: dst_factor_a = (dst_a * (255 - src_a)) / 255 |
36 | | // libwebp used the following formula here: |
37 | | //let dst_factor_a = (dst_a as u32 * (256 - src_a as u32)) >> 8; |
38 | | // however, we've found that we can use a more precise approximation without losing performance: |
39 | 1.17M | let dst_factor_a = div_by_255(u32::from(dst_a) * (255 - u32::from(src_a))); |
40 | 1.17M | let blend_a = u32::from(src_a) + dst_factor_a; |
41 | 1.17M | let scale = (1u32 << 24) / blend_a; |
42 | | |
43 | 1.17M | let blend_r = |
44 | 1.17M | blend_channel_nonpremult(src, src_a, dst, dst_factor_a as u8, scale, channel_shift(0)); |
45 | 1.17M | let blend_g = |
46 | 1.17M | blend_channel_nonpremult(src, src_a, dst, dst_factor_a as u8, scale, channel_shift(1)); |
47 | 1.17M | let blend_b = |
48 | 1.17M | blend_channel_nonpremult(src, src_a, dst, dst_factor_a as u8, scale, channel_shift(2)); |
49 | 1.17M | debug_assert!(u32::from(src_a) + dst_factor_a < 256); |
50 | | |
51 | 1.17M | (u32::from(blend_r) << channel_shift(0)) |
52 | 1.17M | | (u32::from(blend_g) << channel_shift(1)) |
53 | 1.17M | | (u32::from(blend_b) << channel_shift(2)) |
54 | 1.17M | | (blend_a << channel_shift(3)) |
55 | | } |
56 | 2.51M | } |
57 | | |
58 | 2.51M | pub(crate) fn do_alpha_blending(buffer: [u8; 4], canvas: [u8; 4]) -> [u8; 4] { |
59 | | // The original C code contained different shift functions for different endianness, |
60 | | // but they didn't work when ported to Rust directly (and probably didn't work in C either). |
61 | | // So instead we reverse the order of bytes on big-endian here, at the interface. |
62 | | // `from_le_bytes` is a no-op on little endian (most systems) and a cheap shuffle on big endian. |
63 | 2.51M | blend_pixel_nonpremult(u32::from_le_bytes(buffer), u32::from_le_bytes(canvas)).to_le_bytes() |
64 | 2.51M | } |
65 | | |
66 | | /// Divides by 255, rounding to nearest (as opposed to down, like regular integer division does). |
67 | | /// TODO: cannot output 256, so the output is effecitively u8. Plumb that through the code. |
68 | | // |
69 | | // Sources: |
70 | | // https://arxiv.org/pdf/2202.02864 |
71 | | // https://github.com/image-rs/image-webp/issues/119#issuecomment-2544007820 |
72 | | #[inline] |
73 | 1.17M | const fn div_by_255(v: u32) -> u32 { |
74 | 1.17M | (((v + 0x80) >> 8) + v + 0x80) >> 8 |
75 | 1.17M | } |
76 | | |
77 | | #[cfg(test)] |
78 | | mod tests { |
79 | | use super::*; |
80 | | |
81 | | fn do_alpha_blending_reference(buffer: [u8; 4], canvas: [u8; 4]) -> [u8; 4] { |
82 | | let canvas_alpha = f64::from(canvas[3]); |
83 | | let buffer_alpha = f64::from(buffer[3]); |
84 | | let blend_alpha_f64 = buffer_alpha + canvas_alpha * (1.0 - buffer_alpha / 255.0); |
85 | | //value should be between 0 and 255, this truncates the fractional part |
86 | | let blend_alpha: u8 = blend_alpha_f64 as u8; |
87 | | |
88 | | let blend_rgb: [u8; 3] = if blend_alpha == 0 { |
89 | | [0, 0, 0] |
90 | | } else { |
91 | | let mut rgb = [0u8; 3]; |
92 | | for i in 0..3 { |
93 | | let canvas_f64 = f64::from(canvas[i]); |
94 | | let buffer_f64 = f64::from(buffer[i]); |
95 | | |
96 | | let val = (buffer_f64 * buffer_alpha |
97 | | + canvas_f64 * canvas_alpha * (1.0 - buffer_alpha / 255.0)) |
98 | | / blend_alpha_f64; |
99 | | //value should be between 0 and 255, this truncates the fractional part |
100 | | rgb[i] = val as u8; |
101 | | } |
102 | | |
103 | | rgb |
104 | | }; |
105 | | |
106 | | [blend_rgb[0], blend_rgb[1], blend_rgb[2], blend_alpha] |
107 | | } |
108 | | |
109 | | #[test] |
110 | | #[ignore] // takes too long to run on CI. Run this locally when changing the function. |
111 | | fn alpha_blending_optimization() { |
112 | | for r1 in 0..u8::MAX { |
113 | | for a1 in 11..u8::MAX { |
114 | | for r2 in 0..u8::MAX { |
115 | | for a2 in 11..u8::MAX { |
116 | | let opt = do_alpha_blending([r1, 0, 0, a1], [r2, 0, 0, a2]); |
117 | | let slow = do_alpha_blending_reference([r1, 0, 0, a1], [r2, 0, 0, a2]); |
118 | | // libwebp doesn't do exact blending and so we don't either |
119 | | for (o, s) in opt.iter().zip(slow.iter()) { |
120 | | assert!( |
121 | | o.abs_diff(*s) <= 3, |
122 | | "Mismatch in results! opt: {opt:?}, slow: {slow:?}, blended values: [{r1}, 0, 0, {a1}], [{r2}, 0, 0, {a2}]" |
123 | | ); |
124 | | } |
125 | | } |
126 | | } |
127 | | } |
128 | | } |
129 | | } |
130 | | } |