/rust/registry/src/index.crates.io-1949cf8c6b5b557f/zune-jpeg-0.4.21/src/idct.rs

Source
/*
 * Copyright (c) 2023.
 *
 * This software is free software;
 *
 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
 */

//! Routines for IDCT
//!
//! Essentially we provide 2 routines for IDCT, a scalar implementation and a not super optimized
//! AVX2 one, i'll talk about them here.
//!
//! There are 2 reasons why we have the avx one
//! 1. No one compiles with -C target-features=avx2 hence binaries won't probably take advantage(even
//! if it exists).
//! 2. AVX employs zero short circuit in a way the scalar code cannot employ it.
//!     - AVX does this by checking for MCU's whose 63 AC coefficients are zero and if true, it writes
//!        values directly, if false, it goes the long way of calculating.
//!     -   Although this can be trivially implemented in the scalar version, it  generates code
//!         I'm not happy width(scalar version that basically loops and that is too many branches for me)
//!         The avx one does a better job of using bitwise or's with (`_mm256_or_si256`) which is magnitudes of faster
//!         than anything I could come up with
//!
//! The AVX code also has some cool transpose_u16 instructions which look so complicated to be cool
//! (spoiler alert, i barely understand how it works, that's why I credited the owner).
//!
#![allow(
    clippy::excessive_precision,
    clippy::unreadable_literal,
    clippy::module_name_repetitions,
    unused_parens,
    clippy::wildcard_imports
)]

use zune_core::log::debug;
use zune_core::options::DecoderOptions;

use crate::decoder::IDCTPtr;
use crate::idct::scalar::idct_int;

#[cfg(feature = "x86")]
pub mod avx2;
#[cfg(feature = "neon")]
pub mod neon;

pub mod scalar;

/// Choose an appropriate IDCT function
#[allow(unused_variables)]
pub fn choose_idct_func(options: &DecoderOptions) -> IDCTPtr {
    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
    #[cfg(feature = "x86")]
    {
        if options.use_avx2() {
            debug!("Using vector integer IDCT");
            // use avx one
            return crate::idct::avx2::idct_avx2;
        }
    }
    #[cfg(target_arch = "aarch64")]
    #[cfg(feature = "neon")]
    {
        if options.use_neon() {
            debug!("Using vector integer IDCT");
            return crate::idct::neon::idct_neon;
        }
    }
    debug!("Using scalar integer IDCT");
    // use generic one
    return idct_int;
}

#[cfg(test)]
#[allow(unreachable_code)]
#[allow(dead_code)]
mod tests {
    use super::*;

    #[test]
    fn idct_test0() {
        let stride = 8;
        let mut coeff = [10; 64];
        let mut coeff2 = [10; 64];
        let mut output_scalar = [0; 64];
        let mut output_vector = [0; 64];
        idct_fnc()(&mut coeff, &mut output_vector, stride);
        idct_int(&mut coeff2, &mut output_scalar, stride);
        assert_eq!(output_scalar, output_vector, "IDCT and scalar do not match");
    }

    #[test]
    fn do_idct_test1() {
        let stride = 8;
        let mut coeff = [14; 64];
        let mut coeff2 = [14; 64];
        let mut output_scalar = [0; 64];
        let mut output_vector = [0; 64];
        idct_fnc()(&mut coeff, &mut output_vector, stride);
        idct_int(&mut coeff2, &mut output_scalar, stride);
        assert_eq!(output_scalar, output_vector, "IDCT and scalar do not match");
    }

    #[test]
    fn do_idct_test2() {
        let stride = 8;
        let mut coeff = [0; 64];
        coeff[0] = 255;
        coeff[63] = -256;
        let mut coeff2 = coeff;
        let mut output_scalar = [0; 64];
        let mut output_vector = [0; 64];
        idct_fnc()(&mut coeff, &mut output_vector, stride);
        idct_int(&mut coeff2, &mut output_scalar, stride);
        assert_eq!(output_scalar, output_vector, "IDCT and scalar do not match");
    }

    #[test]
    fn do_idct_zeros() {
        let stride = 8;
        let mut coeff = [0; 64];
        let mut coeff2 = [0; 64];
        let mut output_scalar = [0; 64];
        let mut output_vector = [0; 64];
        idct_fnc()(&mut coeff, &mut output_vector, stride);
        idct_int(&mut coeff2, &mut output_scalar, stride);
        assert_eq!(output_scalar, output_vector, "IDCT and scalar do not match");
    }

    fn idct_fnc() -> IDCTPtr {
        #[cfg(feature = "neon")]
        #[cfg(target_arch = "aarch64")]
        {
            use crate::idct::neon::idct_neon;
            return idct_neon;
        }

        #[cfg(feature = "x86")]
        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
        {
            use crate::idct::avx2::idct_avx2;
            return idct_avx2;
        }

        idct_int
    }
}

Coverage Report

Created: 2025-11-11 07:15

Line	Count	Source
1		/*
2		* Copyright (c) 2023.
3		*
4		* This software is free software;
5		*
6		* You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
7		*/
8
9		//! Routines for IDCT
10		//!
11		//! Essentially we provide 2 routines for IDCT, a scalar implementation and a not super optimized
12		//! AVX2 one, i'll talk about them here.
13		//!
14		//! There are 2 reasons why we have the avx one
15		//! 1. No one compiles with -C target-features=avx2 hence binaries won't probably take advantage(even
16		//! if it exists).
17		//! 2. AVX employs zero short circuit in a way the scalar code cannot employ it.
18		//! - AVX does this by checking for MCU's whose 63 AC coefficients are zero and if true, it writes
19		//! values directly, if false, it goes the long way of calculating.
20		//! - Although this can be trivially implemented in the scalar version, it generates code
21		//! I'm not happy width(scalar version that basically loops and that is too many branches for me)
22		//! The avx one does a better job of using bitwise or's with (`_mm256_or_si256`) which is magnitudes of faster
23		//! than anything I could come up with
24		//!
25		//! The AVX code also has some cool transpose_u16 instructions which look so complicated to be cool
26		//! (spoiler alert, i barely understand how it works, that's why I credited the owner).
27		//!
28		#![allow(
29		clippy::excessive_precision,
30		clippy::unreadable_literal,
31		clippy::module_name_repetitions,
32		unused_parens,
33		clippy::wildcard_imports
34		)]
35
36		use zune_core::log::debug;
37		use zune_core::options::DecoderOptions;
38
39		use crate::decoder::IDCTPtr;
40		use crate::idct::scalar::idct_int;
41
42		#[cfg(feature = "x86")]
43		pub mod avx2;
44		#[cfg(feature = "neon")]
45		pub mod neon;
46
47		pub mod scalar;
48
49		/// Choose an appropriate IDCT function
50		#[allow(unused_variables)]
51	5.19k	pub fn choose_idct_func(options: &DecoderOptions) -> IDCTPtr {
52		#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
53		#[cfg(feature = "x86")]
54		{
55	5.19k	if options.use_avx2() {
56		debug!("Using vector integer IDCT");
57		// use avx one
58	5.19k	return crate::idct::avx2::idct_avx2;
59	0	}
60		}
61		#[cfg(target_arch = "aarch64")]
62		#[cfg(feature = "neon")]
63		{
64		if options.use_neon() {
65		debug!("Using vector integer IDCT");
66		return crate::idct::neon::idct_neon;
67		}
68		}
69		debug!("Using scalar integer IDCT");
70		// use generic one
71	0	return idct_int;
72	5.19k	}
73
74		#[cfg(test)]
75		#[allow(unreachable_code)]
76		#[allow(dead_code)]
77		mod tests {
78		use super::*;
79
80		#[test]
81		fn idct_test0() {
82		let stride = 8;
83		let mut coeff = [10; 64];
84		let mut coeff2 = [10; 64];
85		let mut output_scalar = [0; 64];
86		let mut output_vector = [0; 64];
87		idct_fnc()(&mut coeff, &mut output_vector, stride);
88		idct_int(&mut coeff2, &mut output_scalar, stride);
89		assert_eq!(output_scalar, output_vector, "IDCT and scalar do not match");
90		}
91
92		#[test]
93		fn do_idct_test1() {
94		let stride = 8;
95		let mut coeff = [14; 64];
96		let mut coeff2 = [14; 64];
97		let mut output_scalar = [0; 64];
98		let mut output_vector = [0; 64];
99		idct_fnc()(&mut coeff, &mut output_vector, stride);
100		idct_int(&mut coeff2, &mut output_scalar, stride);
101		assert_eq!(output_scalar, output_vector, "IDCT and scalar do not match");
102		}
103
104		#[test]
105		fn do_idct_test2() {
106		let stride = 8;
107		let mut coeff = [0; 64];
108		coeff[0] = 255;
109		coeff[63] = -256;
110		let mut coeff2 = coeff;
111		let mut output_scalar = [0; 64];
112		let mut output_vector = [0; 64];
113		idct_fnc()(&mut coeff, &mut output_vector, stride);
114		idct_int(&mut coeff2, &mut output_scalar, stride);
115		assert_eq!(output_scalar, output_vector, "IDCT and scalar do not match");
116		}
117
118		#[test]
119		fn do_idct_zeros() {
120		let stride = 8;
121		let mut coeff = [0; 64];
122		let mut coeff2 = [0; 64];
123		let mut output_scalar = [0; 64];
124		let mut output_vector = [0; 64];
125		idct_fnc()(&mut coeff, &mut output_vector, stride);
126		idct_int(&mut coeff2, &mut output_scalar, stride);
127		assert_eq!(output_scalar, output_vector, "IDCT and scalar do not match");
128		}
129
130		fn idct_fnc() -> IDCTPtr {
131		#[cfg(feature = "neon")]
132		#[cfg(target_arch = "aarch64")]
133		{
134		use crate::idct::neon::idct_neon;
135		return idct_neon;
136		}
137
138		#[cfg(feature = "x86")]
139		#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
140		{
141		use crate::idct::avx2::idct_avx2;
142		return idct_avx2;
143		}
144
145		idct_int
146		}
147		}