/rust/registry/src/index.crates.io-1949cf8c6b5b557f/zune-jpeg-0.5.6/src/worker.rs

Source
/*
 * Copyright (c) 2023.
 *
 * This software is free software;
 *
 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
 */

use alloc::format;
use core::convert::TryInto;

use zune_core::colorspace::ColorSpace;

use crate::color_convert::ycbcr_to_grayscale;
use crate::components::{Components, SampleRatios};
use crate::decoder::{ColorConvert16Ptr, MAX_COMPONENTS};
use crate::errors::DecodeErrors;

/// fast 0..255 * 0..255 => 0..255 rounded multiplication
///
/// Borrowed from stb
#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
#[inline]
fn blinn_8x8(in_val: u8, y: u8) -> u8 {
    let t = i32::from(in_val) * i32::from(y) + 128;
    return ((t + (t >> 8)) >> 8) as u8;
}

#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
pub(crate) fn color_convert(
    unprocessed: &[&[i16]; MAX_COMPONENTS], color_convert_16: ColorConvert16Ptr,
    input_colorspace: ColorSpace, output_colorspace: ColorSpace, output: &mut [u8], width: usize,
    padded_width: usize
) -> Result<(), DecodeErrors> {
    if input_colorspace.num_components() == 3 && input_colorspace == output_colorspace {
        // sort things like RGB to RGB conversion
        copy_removing_padding(unprocessed, width, padded_width, output);
        return Ok(());
    }
    if input_colorspace.num_components() == 4 && input_colorspace == output_colorspace {
        copy_removing_padding_4x(unprocessed, width, padded_width, output);
        return Ok(());
    }
    // color convert
    match (input_colorspace, output_colorspace) {
        (ColorSpace::YCbCr | ColorSpace::Luma, ColorSpace::Luma) => {
            ycbcr_to_grayscale(unprocessed[0], width, padded_width, output);
        }
        (
            ColorSpace::YCbCr,
            ColorSpace::RGB | ColorSpace::RGBA | ColorSpace::BGR | ColorSpace::BGRA
        ) => {
            color_convert_ycbcr(
                unprocessed,
                width,
                padded_width,
                output_colorspace,
                color_convert_16,
                output
            );
        }
        (ColorSpace::YCCK, ColorSpace::RGB) => {
            color_convert_ycck_to_rgb::<3>(
                unprocessed,
                width,
                padded_width,
                output_colorspace,
                color_convert_16,
                output
            );
        }

        (ColorSpace::YCCK, ColorSpace::RGBA) => {
            color_convert_ycck_to_rgb::<4>(
                unprocessed,
                width,
                padded_width,
                output_colorspace,
                color_convert_16,
                output
            );
        }
        (ColorSpace::CMYK, ColorSpace::RGB) => {
            color_convert_cymk_to_rgb::<3>(unprocessed, width, padded_width, output);
        }
        (ColorSpace::CMYK, ColorSpace::RGBA) => {
            color_convert_cymk_to_rgb::<4>(unprocessed, width, padded_width, output);
        }
        (ColorSpace::MultiBand(n), _) => {
            if n.get() != 2 {
                return Err(DecodeErrors::Format(format!(
                    "Unknown multiband sample ({n}), please share sample"
                )));
            }
            copy_removing_padding_generic(
                unprocessed,
                width,
                padded_width,
                output,
                n.get() as usize
            );
        }
        (ColorSpace::Luma, ColorSpace::RGB) => {
            // duplicate the luma channel  three times to form RGB
            // Note, this may assume the direct conversion
            // from luma to RGB is by duplicating
            //
            // There may be a bit more complex ways
            // of doing it but won't get onto it
            convert_luma_to_rgb(unprocessed, width, padded_width, output)
        }
        (ColorSpace::Luma, ColorSpace::RGBA) => {
            // duplicate the luma channel  three times to form RGB
            // add 255 as alpha
            // Note, this may assume the direct conversion
            // from luma to RGB is by duplicating
            //
            // There may be a bit more complex ways
            // of doing it but won't get onto it
            convert_luma_to_rgba(unprocessed, width, padded_width, output)
        }

        // For the other components we do nothing(currently)
        _ => {
            let msg = format!(
                "Unimplemented colorspace mapping from {input_colorspace:?} to {output_colorspace:?}");

            return Err(DecodeErrors::Format(msg));
        }
    }
    Ok(())
}

fn convert_luma_to_rgb(
    mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, output: &mut [u8]
) {
    for (pix_w, y_w) in output
        .chunks_exact_mut(width * 3)
        .zip(mcu_block[0].chunks_exact(padded_width))
    {
        for (pix, c) in pix_w.chunks_exact_mut(3).zip(y_w) {
            pix[0] = *c as u8;
            pix[1] = *c as u8;
            pix[2] = *c as u8;
        }
    }
}
fn convert_luma_to_rgba(
    mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, output: &mut [u8]
) {
    for (pix_w, y_w) in output
        .chunks_exact_mut(width * 4)
        .zip(mcu_block[0].chunks_exact(padded_width))
    {
        for (pix, c) in pix_w.chunks_exact_mut(4).zip(y_w) {
            pix[0] = *c as u8;
            pix[1] = *c as u8;
            pix[2] = *c as u8;
            pix[3] = 255;
        }
    }
}
/// Copy a block to output removing padding bytes from input
/// if necessary
#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
fn copy_removing_padding(
    mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, output: &mut [u8]
) {
    for (((pix_w, c_w), m_w), y_w) in output
        .chunks_exact_mut(width * 3)
        .zip(mcu_block[0].chunks_exact(padded_width))
        .zip(mcu_block[1].chunks_exact(padded_width))
        .zip(mcu_block[2].chunks_exact(padded_width))
    {
        for (((pix, c), y), m) in pix_w.chunks_exact_mut(3).zip(c_w).zip(m_w).zip(y_w) {
            pix[0] = *c as u8;
            pix[1] = *y as u8;
            pix[2] = *m as u8;
        }
    }
}
#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
fn copy_removing_padding_4x(
    mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, output: &mut [u8]
) {
    for ((((pix_w, c_w), m_w), y_w), k_w) in output
        .chunks_exact_mut(width * 4)
        .zip(mcu_block[0].chunks_exact(padded_width))
        .zip(mcu_block[1].chunks_exact(padded_width))
        .zip(mcu_block[2].chunks_exact(padded_width))
        .zip(mcu_block[3].chunks_exact(padded_width))
    {
        for ((((pix, c), y), m), k) in pix_w
            .chunks_exact_mut(4)
            .zip(c_w)
            .zip(m_w)
            .zip(y_w)
            .zip(k_w)
        {
            pix[0] = *c as u8;
            pix[1] = *y as u8;
            pix[2] = *m as u8;
            pix[3] = *k as u8;
        }
    }
}
#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
fn copy_removing_padding_generic(
    mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, output: &mut [u8],
    channels: usize
) {
    match channels {
        // just do 2 for now
        2 => {
            for ((pix_w, y_w), k_w) in output
                .chunks_exact_mut(width * channels)
                .zip(mcu_block[0].chunks_exact(padded_width))
                .zip(mcu_block[1].chunks_exact(padded_width))
            {
                for ((pix, c), k) in pix_w.chunks_exact_mut(2).zip(y_w).zip(k_w) {
                    pix[0] = *c as u8;
                    pix[1] = *k as u8;
                }
            }
        }
        _ => unreachable!()
    }
}
/// Convert YCCK image to rgb
#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
fn color_convert_ycck_to_rgb<const NUM_COMPONENTS: usize>(
    mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize,
    output_colorspace: ColorSpace, color_convert_16: ColorConvert16Ptr, output: &mut [u8]
) {
    color_convert_ycbcr(
        mcu_block,
        width,
        padded_width,
        output_colorspace,
        color_convert_16,
        output
    );
    for (pix_w, m_w) in output
        .chunks_exact_mut(width * 3)
        .zip(mcu_block[3].chunks_exact(padded_width))
    {
        for (pix, m) in pix_w.chunks_exact_mut(NUM_COMPONENTS).zip(m_w) {
            let m = (*m) as u8;
            pix[0] = blinn_8x8(255 - pix[0], m);
            pix[1] = blinn_8x8(255 - pix[1], m);
            pix[2] = blinn_8x8(255 - pix[2], m);
        }
    }
}

#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
fn color_convert_cymk_to_rgb<const NUM_COMPONENTS: usize>(
    mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, output: &mut [u8]
) {
    for ((((pix_w, c_w), m_w), y_w), k_w) in output
        .chunks_exact_mut(width * NUM_COMPONENTS)
        .zip(mcu_block[0].chunks_exact(padded_width))
        .zip(mcu_block[1].chunks_exact(padded_width))
        .zip(mcu_block[2].chunks_exact(padded_width))
        .zip(mcu_block[3].chunks_exact(padded_width))
    {
        for ((((pix, c), m), y), k) in pix_w
            .chunks_exact_mut(3)
            .zip(c_w)
            .zip(m_w)
            .zip(y_w)
            .zip(k_w)
        {
            let c = *c as u8;
            let m = *m as u8;
            let y = *y as u8;
            let k = *k as u8;

            pix[0] = blinn_8x8(c, k);
            pix[1] = blinn_8x8(m, k);
            pix[2] = blinn_8x8(y, k);
        }
    }
}

/// Do color-conversion for interleaved MCU
#[allow(
    clippy::similar_names,
    clippy::too_many_arguments,
    clippy::needless_pass_by_value,
    clippy::unwrap_used
)]
fn color_convert_ycbcr(
    mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize,
    output_colorspace: ColorSpace, color_convert_16: ColorConvert16Ptr, output: &mut [u8]
) {
    let num_components = output_colorspace.num_components();

    let stride = width * num_components;
    // Allocate temporary buffer for small widths less than  16.
    let mut temp = [0; 64];
    // We need to chunk per width to ensure we can discard extra values at the end of the width.
    // Since the encoder may pad bits to ensure the width is a multiple of 8.
    for (((y_width, cb_width), cr_width), out) in mcu_block[0]
        .chunks_exact(padded_width)
        .zip(mcu_block[1].chunks_exact(padded_width))
        .zip(mcu_block[2].chunks_exact(padded_width))
        .zip(output.chunks_exact_mut(stride))
    {
        if width < 16 {
            // allocate temporary buffers for the values received from idct
            let mut y_out = [0; 16];
            let mut cb_out = [0; 16];
            let mut cr_out = [0; 16];
            // copy those small widths to that buffer
            y_out[0..y_width.len()].copy_from_slice(y_width);
            cb_out[0..cb_width.len()].copy_from_slice(cb_width);
            cr_out[0..cr_width.len()].copy_from_slice(cr_width);
            // we handle widths less than 16 a bit differently, allocating a temporary
            // buffer and writing to that and then flushing to the out buffer
            // because of the optimizations applied below,
            (color_convert_16)(&y_out, &cb_out, &cr_out, &mut temp, &mut 0);
            // copy to stride
            out[0..width * num_components].copy_from_slice(&temp[0..width * num_components]);
            // next
            continue;
        }

        // Chunk in outputs of 16 to pass to color_convert as an array of 16 i16's.
        for (((y, cb), cr), out_c) in y_width
            .chunks_exact(16)
            .zip(cb_width.chunks_exact(16))
            .zip(cr_width.chunks_exact(16))
            .zip(out.chunks_exact_mut(16 * num_components))
        {
            (color_convert_16)(
                y.try_into().unwrap(),
                cb.try_into().unwrap(),
                cr.try_into().unwrap(),
                out_c,
                &mut 0
            );
        }
        //we have more pixels in the end that can't be handled by the main loop.
        //move pointer back a little bit to get last 16 bytes,
        //color convert, and overwrite
        //This means some values will be color converted twice.
        for ((y, cb), cr) in y_width[width - 16..]
            .chunks_exact(16)
            .zip(cb_width[width - 16..].chunks_exact(16))
            .zip(cr_width[width - 16..].chunks_exact(16))
            .take(1)
        {
            (color_convert_16)(
                y.try_into().unwrap(),
                cb.try_into().unwrap(),
                cr.try_into().unwrap(),
                &mut temp,
                &mut 0
            );
        }

        let rem = out[(width - 16) * num_components..]
            .chunks_exact_mut(16 * num_components)
            .next()
            .unwrap();

        rem.copy_from_slice(&temp[0..rem.len()]);
    }
}
pub(crate) fn upsample(
    component: &mut Components, mcu_height: usize, i: usize, upsampler_scratch_space: &mut [i16],
    has_vertical_sample: bool
) {
    match component.sample_ratio {
        SampleRatios::V | SampleRatios::HV => {
            /*
            When upsampling vertically sampled images, we have a certain problem
            which is that we do not have all MCU's decoded, this usually sucks at boundaries
            e.g we can't upsample the last mcu row, since the row_down currently doesn't exist

            To solve this we need to do two things

            1. Carry over coefficients when we lack enough data to upsample
            2. Upsample when we have enough data

            To achieve (1), we store a previous row, and the current row in components themselves
            which will later be used to make (2)

            To achieve (2), we take the stored previous row(second last MCU row),
            current row(last mcu row) and row down(first row of newly decoded MCU)

            and upsample that and store it in first_row_upsample_dest, this contains
            up-sampled coefficients for the last for the previous decoded mcu row.

            The caller is then expected to process first_row_upsample_dest before processing data
            in component.upsample_dest which stores the up-sampled components excluding the last row
            */

            let mut dest_start = 0;
            let stride_bytes_written = component.width_stride * component.sample_ratio.sample();

            if i > 0 {
                // Handle the last MCU of the previous row
                // This wasn't up-sampled as we didn't have the row_down
                // so we do it now

                let stride = component.width_stride;

                let dest = &mut component.first_row_upsample_dest[0..stride_bytes_written];

                // get current row
                let row = &component.row[..];
                let row_up = &component.row_up[..];
                let row_down = &component.raw_coeff[0..stride];
                (component.up_sampler)(row, row_up, row_down, upsampler_scratch_space, dest);
            }

            // we have the Y component width stride.
            // this may be higher than the actual width,(2x because vertical sampling)
            //
            // This will not upsample the last row

            // if false, do not upsample.
            // set to false on the last row of an mcu
            let mut upsample = true;

            let stride = component.width_stride * component.vertical_sample;
            let stop_offset = component.raw_coeff.len() / component.width_stride;
            for (pos, curr_row) in component
                .raw_coeff
                .chunks_exact(component.width_stride)
                .enumerate()
            {
                let mut dest: &mut [i16] = &mut [];
                let mut row_up: &[i16] = &[];
                // row below current sample
                let mut row_down: &[i16] = &[];

                // Order of ifs matters

                if i == 0 && pos == 0 {
                    // first IMAGE row, row_up is the same as current row
                    // row_down is the row below.
                    row_up = &component.raw_coeff[pos * stride..(pos + 1) * stride];
                    row_down = &component.raw_coeff[(pos + 1) * stride..(pos + 2) * stride];
                } else if i > 0 && pos == 0 {
                    // first row of a new mcu, previous row was copied so use that
                    row_up = &component.row[..];
                    row_down = &component.raw_coeff[(pos + 1) * stride..(pos + 2) * stride];
                } else if i == mcu_height.saturating_sub(1) && pos == stop_offset - 1 {
                    // last IMAGE row, adjust pointer to use previous row and current row
                    row_up = &component.raw_coeff[(pos - 1) * stride..pos * stride];
                    row_down = &component.raw_coeff[pos * stride..(pos + 1) * stride];
                } else if pos > 0 && pos < stop_offset - 1 {
                    // other rows, get row up and row down relative to our current row
                    // ignore last row of each mcu
                    row_up = &component.raw_coeff[(pos - 1) * stride..pos * stride];
                    row_down = &component.raw_coeff[(pos + 1) * stride..(pos + 2) * stride];
                } else if pos == stop_offset - 1 {
                    // last MCU in a row
                    //
                    // we need a row at the next MCU but we haven't decoded that MCU yet
                    // so we should save this and when we have the next MCU,
                    // do the upsampling

                    // store the current row and previous row in a buffer
                    let prev_row = &component.raw_coeff[(pos - 1) * stride..pos * stride];

                    component.row_up.copy_from_slice(prev_row);
                    component.row.copy_from_slice(curr_row);
                    upsample = false;
                } else {
                    unreachable!("Uh oh!");
                }
                if upsample {
                    dest =
                        &mut component.upsample_dest[dest_start..dest_start + stride_bytes_written];
                    dest_start += stride_bytes_written;
                }

                if upsample {
                    // upsample
                    (component.up_sampler)(
                        curr_row,
                        row_up,
                        row_down,
                        upsampler_scratch_space,
                        dest
                    );
                }
            }
        }
        SampleRatios::H => {
            //assert_eq!(component.raw_coeff.len() * 2, component.upsample_dest.len());
            // Before it was an assert, but numerous and numerous and numerous
            // bug fixes and ad hoc solutions later, I have now just decided  to keep it as a resize
            component
                .upsample_dest
                .resize(component.raw_coeff.len() * 2, 0);

            let raw_coeff = &component.raw_coeff;
            let dest_coeff = &mut component.upsample_dest;

            if has_vertical_sample {
                /*
                There have been images that have the following configurations.

                Component ID:Y    HS:2 VS:2 QT:0
                Component ID:Cb   HS:1 VS:1 QT:1
                Component ID:Cr   HS:1 VS:2 QT:1

                This brings out a nasty case of misaligned sampling factors. Cr will need to save a row because
                of the way we process boundaries but Cb won't since Cr is horizontally sampled while Cb is
                HV sampled with respect to the image sampling factors.

                So during decoding of one MCU, we could only do 7 and not 8 rows, but the SampleRatio::H never had to
                save a single line, since it doesn't suffer from boundary issues.

                Now this takes care of that, saving the last MCU row in case it will be needed.
                We save the previous row before up-sampling this row because the boundary issue is in
                the last MCU row of the previous MCU.

                PS(cae): I can't add the image to the repo as it is nsfw, but can send if required
                */
                let length = component.first_row_upsample_dest.len();
                component
                    .first_row_upsample_dest
                    .copy_from_slice(&dest_coeff.rchunks_exact(length).next().unwrap());
            }
            // up-sample each row
            for (single_row, output_stride) in raw_coeff
                .chunks_exact(component.width_stride)
                .zip(dest_coeff.chunks_exact_mut(component.width_stride * 2))
            {
                // upsample using the fn pointer, should only be H, so no need for
                // row up and row down
                (component.up_sampler)(single_row, &[], &[], &mut [], output_stride);
            }
        }
        SampleRatios::Generic(h, v) => {
            let raw_coeff = &component.raw_coeff;
            let dest_coeff = &mut component.upsample_dest;

            //let size =  component.width_stride.div_ceil(v);

            // for (single_row, output_stride) in raw_coeff
            //     .chunks_exact(size)
            //     .zip(dest_coeff.chunks_exact_mut(component.width_stride * h))
            // {
            //     (component.up_sampler)(single_row, &[], &[], &mut [], output_stride);
            //
            // }
            for (single_row, output_stride) in raw_coeff
                .chunks_exact(component.width_stride)
                .zip(dest_coeff.chunks_exact_mut(component.width_stride * h * v))
            {
                for row in output_stride.chunks_exact_mut(component.width_stride * h) {
                    (component.up_sampler)(single_row, &[], &[], &mut [], row);
                }
            }
        }
        SampleRatios::None => {}
    };
}

Coverage Report

Created: 2025-12-14 07:56

Line	Count	Source
1		/*
2		* Copyright (c) 2023.
3		*
4		* This software is free software;
5		*
6		* You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
7		*/
8
9		use alloc::format;
10		use core::convert::TryInto;
11
12		use zune_core::colorspace::ColorSpace;
13
14		use crate::color_convert::ycbcr_to_grayscale;
15		use crate::components::{Components, SampleRatios};
16		use crate::decoder::{ColorConvert16Ptr, MAX_COMPONENTS};
17		use crate::errors::DecodeErrors;
18
19		/// fast 0..255 * 0..255 => 0..255 rounded multiplication
20		///
21		/// Borrowed from stb
22		#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
23		#[inline]
24	0	fn blinn_8x8(in_val: u8, y: u8) -> u8 {
25	0	let t = i32::from(in_val) * i32::from(y) + 128;
26	0	return ((t + (t >> 8)) >> 8) as u8;
27	0	}
28
29		#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
30	0	pub(crate) fn color_convert(
31	0	unprocessed: &[&[i16]; MAX_COMPONENTS], color_convert_16: ColorConvert16Ptr,
32	0	input_colorspace: ColorSpace, output_colorspace: ColorSpace, output: &mut [u8], width: usize,
33	0	padded_width: usize
34	0	) -> Result<(), DecodeErrors> {
35	0	if input_colorspace.num_components() == 3 && input_colorspace == output_colorspace {
36		// sort things like RGB to RGB conversion
37	0	copy_removing_padding(unprocessed, width, padded_width, output);
38	0	return Ok(());
39	0	}
40	0	if input_colorspace.num_components() == 4 && input_colorspace == output_colorspace {
41	0	copy_removing_padding_4x(unprocessed, width, padded_width, output);
42	0	return Ok(());
43	0	}
44		// color convert
45	0	match (input_colorspace, output_colorspace) {
46	0	(ColorSpace::YCbCr \| ColorSpace::Luma, ColorSpace::Luma) => {
47	0	ycbcr_to_grayscale(unprocessed[0], width, padded_width, output);
48	0	}
49		(
50		ColorSpace::YCbCr,
51		ColorSpace::RGB \| ColorSpace::RGBA \| ColorSpace::BGR \| ColorSpace::BGRA
52	0	) => {
53	0	color_convert_ycbcr(
54	0	unprocessed,
55	0	width,
56	0	padded_width,
57	0	output_colorspace,
58	0	color_convert_16,
59	0	output
60	0	);
61	0	}
62	0	(ColorSpace::YCCK, ColorSpace::RGB) => {
63	0	color_convert_ycck_to_rgb::<3>(
64	0	unprocessed,
65	0	width,
66	0	padded_width,
67	0	output_colorspace,
68	0	color_convert_16,
69	0	output
70	0	);
71	0	}
72
73	0	(ColorSpace::YCCK, ColorSpace::RGBA) => {
74	0	color_convert_ycck_to_rgb::<4>(
75	0	unprocessed,
76	0	width,
77	0	padded_width,
78	0	output_colorspace,
79	0	color_convert_16,
80	0	output
81	0	);
82	0	}
83	0	(ColorSpace::CMYK, ColorSpace::RGB) => {
84	0	color_convert_cymk_to_rgb::<3>(unprocessed, width, padded_width, output);
85	0	}
86	0	(ColorSpace::CMYK, ColorSpace::RGBA) => {
87	0	color_convert_cymk_to_rgb::<4>(unprocessed, width, padded_width, output);
88	0	}
89	0	(ColorSpace::MultiBand(n), _) => {
90	0	if n.get() != 2 {
91	0	return Err(DecodeErrors::Format(format!(
92	0	"Unknown multiband sample ({n}), please share sample"
93	0	)));
94	0	}
95	0	copy_removing_padding_generic(
96	0	unprocessed,
97	0	width,
98	0	padded_width,
99	0	output,
100	0	n.get() as usize
101		);
102		}
103		(ColorSpace::Luma, ColorSpace::RGB) => {
104		// duplicate the luma channel three times to form RGB
105		// Note, this may assume the direct conversion
106		// from luma to RGB is by duplicating
107		//
108		// There may be a bit more complex ways
109		// of doing it but won't get onto it
110	0	convert_luma_to_rgb(unprocessed, width, padded_width, output)
111		}
112		(ColorSpace::Luma, ColorSpace::RGBA) => {
113		// duplicate the luma channel three times to form RGB
114		// add 255 as alpha
115		// Note, this may assume the direct conversion
116		// from luma to RGB is by duplicating
117		//
118		// There may be a bit more complex ways
119		// of doing it but won't get onto it
120	0	convert_luma_to_rgba(unprocessed, width, padded_width, output)
121		}
122
123		// For the other components we do nothing(currently)
124		_ => {
125	0	let msg = format!(
126	0	"Unimplemented colorspace mapping from {input_colorspace:?} to {output_colorspace:?}");
127
128	0	return Err(DecodeErrors::Format(msg));
129		}
130		}
131	0	Ok(())
132	0	}
133
134	0	fn convert_luma_to_rgb(
135	0	mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, output: &mut [u8]
136	0	) {
137	0	for (pix_w, y_w) in output
138	0	.chunks_exact_mut(width * 3)
139	0	.zip(mcu_block[0].chunks_exact(padded_width))
140		{
141	0	for (pix, c) in pix_w.chunks_exact_mut(3).zip(y_w) {
142	0	pix[0] = *c as u8;
143	0	pix[1] = *c as u8;
144	0	pix[2] = *c as u8;
145	0	}
146		}
147	0	}
148	0	fn convert_luma_to_rgba(
149	0	mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, output: &mut [u8]
150	0	) {
151	0	for (pix_w, y_w) in output
152	0	.chunks_exact_mut(width * 4)
153	0	.zip(mcu_block[0].chunks_exact(padded_width))
154		{
155	0	for (pix, c) in pix_w.chunks_exact_mut(4).zip(y_w) {
156	0	pix[0] = *c as u8;
157	0	pix[1] = *c as u8;
158	0	pix[2] = *c as u8;
159	0	pix[3] = 255;
160	0	}
161		}
162	0	}
163		/// Copy a block to output removing padding bytes from input
164		/// if necessary
165		#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
166	0	fn copy_removing_padding(
167	0	mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, output: &mut [u8]
168	0	) {
169	0	for (((pix_w, c_w), m_w), y_w) in output
170	0	.chunks_exact_mut(width * 3)
171	0	.zip(mcu_block[0].chunks_exact(padded_width))
172	0	.zip(mcu_block[1].chunks_exact(padded_width))
173	0	.zip(mcu_block[2].chunks_exact(padded_width))
174		{
175	0	for (((pix, c), y), m) in pix_w.chunks_exact_mut(3).zip(c_w).zip(m_w).zip(y_w) {
176	0	pix[0] = *c as u8;
177	0	pix[1] = *y as u8;
178	0	pix[2] = *m as u8;
179	0	}
180		}
181	0	}
182		#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
183	0	fn copy_removing_padding_4x(
184	0	mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, output: &mut [u8]
185	0	) {
186	0	for ((((pix_w, c_w), m_w), y_w), k_w) in output
187	0	.chunks_exact_mut(width * 4)
188	0	.zip(mcu_block[0].chunks_exact(padded_width))
189	0	.zip(mcu_block[1].chunks_exact(padded_width))
190	0	.zip(mcu_block[2].chunks_exact(padded_width))
191	0	.zip(mcu_block[3].chunks_exact(padded_width))
192		{
193	0	for ((((pix, c), y), m), k) in pix_w
194	0	.chunks_exact_mut(4)
195	0	.zip(c_w)
196	0	.zip(m_w)
197	0	.zip(y_w)
198	0	.zip(k_w)
199	0	{
200	0	pix[0] = *c as u8;
201	0	pix[1] = *y as u8;
202	0	pix[2] = *m as u8;
203	0	pix[3] = *k as u8;
204	0	}
205		}
206	0	}
207		#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
208	0	fn copy_removing_padding_generic(
209	0	mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, output: &mut [u8],
210	0	channels: usize
211	0	) {
212	0	match channels {
213		// just do 2 for now
214		2 => {
215	0	for ((pix_w, y_w), k_w) in output
216	0	.chunks_exact_mut(width * channels)
217	0	.zip(mcu_block[0].chunks_exact(padded_width))
218	0	.zip(mcu_block[1].chunks_exact(padded_width))
219		{
220	0	for ((pix, c), k) in pix_w.chunks_exact_mut(2).zip(y_w).zip(k_w) {
221	0	pix[0] = *c as u8;
222	0	pix[1] = *k as u8;
223	0	}
224		}
225		}
226	0	_ => unreachable!()
227		}
228	0	}
229		/// Convert YCCK image to rgb
230		#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
231	0	fn color_convert_ycck_to_rgb<const NUM_COMPONENTS: usize>(
232	0	mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize,
233	0	output_colorspace: ColorSpace, color_convert_16: ColorConvert16Ptr, output: &mut [u8]
234	0	) {
235	0	color_convert_ycbcr(
236	0	mcu_block,
237	0	width,
238	0	padded_width,
239	0	output_colorspace,
240	0	color_convert_16,
241	0	output
242		);
243	0	for (pix_w, m_w) in output
244	0	.chunks_exact_mut(width * 3)
245	0	.zip(mcu_block[3].chunks_exact(padded_width))
246		{
247	0	for (pix, m) in pix_w.chunks_exact_mut(NUM_COMPONENTS).zip(m_w) {
248	0	let m = (*m) as u8;
249	0	pix[0] = blinn_8x8(255 - pix[0], m);
250	0	pix[1] = blinn_8x8(255 - pix[1], m);
251	0	pix[2] = blinn_8x8(255 - pix[2], m);
252	0	}
253		}
254	0	} Unexecuted instantiation: zune_jpeg::worker::color_convert_ycck_to_rgb::<3> Unexecuted instantiation: zune_jpeg::worker::color_convert_ycck_to_rgb::<4>
255
256		#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
257	0	fn color_convert_cymk_to_rgb<const NUM_COMPONENTS: usize>(
258	0	mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, output: &mut [u8]
259	0	) {
260	0	for ((((pix_w, c_w), m_w), y_w), k_w) in output
261	0	.chunks_exact_mut(width * NUM_COMPONENTS)
262	0	.zip(mcu_block[0].chunks_exact(padded_width))
263	0	.zip(mcu_block[1].chunks_exact(padded_width))
264	0	.zip(mcu_block[2].chunks_exact(padded_width))
265	0	.zip(mcu_block[3].chunks_exact(padded_width))
266		{
267	0	for ((((pix, c), m), y), k) in pix_w
268	0	.chunks_exact_mut(3)
269	0	.zip(c_w)
270	0	.zip(m_w)
271	0	.zip(y_w)
272	0	.zip(k_w)
273	0	{
274	0	let c = *c as u8;
275	0	let m = *m as u8;
276	0	let y = *y as u8;
277	0	let k = *k as u8;
278	0
279	0	pix[0] = blinn_8x8(c, k);
280	0	pix[1] = blinn_8x8(m, k);
281	0	pix[2] = blinn_8x8(y, k);
282	0	}
283		}
284	0	} Unexecuted instantiation: zune_jpeg::worker::color_convert_cymk_to_rgb::<3> Unexecuted instantiation: zune_jpeg::worker::color_convert_cymk_to_rgb::<4>
285
286		/// Do color-conversion for interleaved MCU
287		#[allow(
288		clippy::similar_names,
289		clippy::too_many_arguments,
290		clippy::needless_pass_by_value,
291		clippy::unwrap_used
292		)]
293	0	fn color_convert_ycbcr(
294	0	mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize,
295	0	output_colorspace: ColorSpace, color_convert_16: ColorConvert16Ptr, output: &mut [u8]
296	0	) {
297	0	let num_components = output_colorspace.num_components();
298
299	0	let stride = width * num_components;
300		// Allocate temporary buffer for small widths less than 16.
301	0	let mut temp = [0; 64];
302		// We need to chunk per width to ensure we can discard extra values at the end of the width.
303		// Since the encoder may pad bits to ensure the width is a multiple of 8.
304	0	for (((y_width, cb_width), cr_width), out) in mcu_block[0]
305	0	.chunks_exact(padded_width)
306	0	.zip(mcu_block[1].chunks_exact(padded_width))
307	0	.zip(mcu_block[2].chunks_exact(padded_width))
308	0	.zip(output.chunks_exact_mut(stride))
309		{
310	0	if width < 16 {
311		// allocate temporary buffers for the values received from idct
312	0	let mut y_out = [0; 16];
313	0	let mut cb_out = [0; 16];
314	0	let mut cr_out = [0; 16];
315		// copy those small widths to that buffer
316	0	y_out[0..y_width.len()].copy_from_slice(y_width);
317	0	cb_out[0..cb_width.len()].copy_from_slice(cb_width);
318	0	cr_out[0..cr_width.len()].copy_from_slice(cr_width);
319		// we handle widths less than 16 a bit differently, allocating a temporary
320		// buffer and writing to that and then flushing to the out buffer
321		// because of the optimizations applied below,
322	0	(color_convert_16)(&y_out, &cb_out, &cr_out, &mut temp, &mut 0);
323		// copy to stride
324	0	out[0..width * num_components].copy_from_slice(&temp[0..width * num_components]);
325		// next
326	0	continue;
327	0	}
328
329		// Chunk in outputs of 16 to pass to color_convert as an array of 16 i16's.
330	0	for (((y, cb), cr), out_c) in y_width
331	0	.chunks_exact(16)
332	0	.zip(cb_width.chunks_exact(16))
333	0	.zip(cr_width.chunks_exact(16))
334	0	.zip(out.chunks_exact_mut(16 * num_components))
335	0	{
336	0	(color_convert_16)(
337	0	y.try_into().unwrap(),
338	0	cb.try_into().unwrap(),
339	0	cr.try_into().unwrap(),
340	0	out_c,
341	0	&mut 0
342	0	);
343	0	}
344		//we have more pixels in the end that can't be handled by the main loop.
345		//move pointer back a little bit to get last 16 bytes,
346		//color convert, and overwrite
347		//This means some values will be color converted twice.
348	0	for ((y, cb), cr) in y_width[width - 16..]
349	0	.chunks_exact(16)
350	0	.zip(cb_width[width - 16..].chunks_exact(16))
351	0	.zip(cr_width[width - 16..].chunks_exact(16))
352	0	.take(1)
353	0	{
354	0	(color_convert_16)(
355	0	y.try_into().unwrap(),
356	0	cb.try_into().unwrap(),
357	0	cr.try_into().unwrap(),
358	0	&mut temp,
359	0	&mut 0
360	0	);
361	0	}
362
363	0	let rem = out[(width - 16) * num_components..]
364	0	.chunks_exact_mut(16 * num_components)
365	0	.next()
366	0	.unwrap();
367
368	0	rem.copy_from_slice(&temp[0..rem.len()]);
369		}
370	0	}
371	0	pub(crate) fn upsample(
372	0	component: &mut Components, mcu_height: usize, i: usize, upsampler_scratch_space: &mut [i16],
373	0	has_vertical_sample: bool
374	0	) {
375	0	match component.sample_ratio {
376		SampleRatios::V \| SampleRatios::HV => {
377		/*
378		When upsampling vertically sampled images, we have a certain problem
379		which is that we do not have all MCU's decoded, this usually sucks at boundaries
380		e.g we can't upsample the last mcu row, since the row_down currently doesn't exist
381
382		To solve this we need to do two things
383
384		1. Carry over coefficients when we lack enough data to upsample
385		2. Upsample when we have enough data
386
387		To achieve (1), we store a previous row, and the current row in components themselves
388		which will later be used to make (2)
389
390		To achieve (2), we take the stored previous row(second last MCU row),
391		current row(last mcu row) and row down(first row of newly decoded MCU)
392
393		and upsample that and store it in first_row_upsample_dest, this contains
394		up-sampled coefficients for the last for the previous decoded mcu row.
395
396		The caller is then expected to process first_row_upsample_dest before processing data
397		in component.upsample_dest which stores the up-sampled components excluding the last row
398		*/
399
400	0	let mut dest_start = 0;
401	0	let stride_bytes_written = component.width_stride * component.sample_ratio.sample();
402
403	0	if i > 0 {
404	0	// Handle the last MCU of the previous row
405	0	// This wasn't up-sampled as we didn't have the row_down
406	0	// so we do it now
407	0
408	0	let stride = component.width_stride;
409	0
410	0	let dest = &mut component.first_row_upsample_dest[0..stride_bytes_written];
411	0
412	0	// get current row
413	0	let row = &component.row[..];
414	0	let row_up = &component.row_up[..];
415	0	let row_down = &component.raw_coeff[0..stride];
416	0	(component.up_sampler)(row, row_up, row_down, upsampler_scratch_space, dest);
417	0	}
418
419		// we have the Y component width stride.
420		// this may be higher than the actual width,(2x because vertical sampling)
421		//
422		// This will not upsample the last row
423
424		// if false, do not upsample.
425		// set to false on the last row of an mcu
426	0	let mut upsample = true;
427
428	0	let stride = component.width_stride * component.vertical_sample;
429	0	let stop_offset = component.raw_coeff.len() / component.width_stride;
430	0	for (pos, curr_row) in component
431	0	.raw_coeff
432	0	.chunks_exact(component.width_stride)
433	0	.enumerate()
434		{
435	0	let mut dest: &mut [i16] = &mut [];
436	0	let mut row_up: &[i16] = &[];
437		// row below current sample
438	0	let mut row_down: &[i16] = &[];
439
440		// Order of ifs matters
441
442	0	if i == 0 && pos == 0 {
443	0	// first IMAGE row, row_up is the same as current row
444	0	// row_down is the row below.
445	0	row_up = &component.raw_coeff[pos * stride..(pos + 1) * stride];
446	0	row_down = &component.raw_coeff[(pos + 1) * stride..(pos + 2) * stride];
447	0	} else if i > 0 && pos == 0 {
448	0	// first row of a new mcu, previous row was copied so use that
449	0	row_up = &component.row[..];
450	0	row_down = &component.raw_coeff[(pos + 1) * stride..(pos + 2) * stride];
451	0	} else if i == mcu_height.saturating_sub(1) && pos == stop_offset - 1 {
452	0	// last IMAGE row, adjust pointer to use previous row and current row
453	0	row_up = &component.raw_coeff[(pos - 1) * stride..pos * stride];
454	0	row_down = &component.raw_coeff[pos * stride..(pos + 1) * stride];
455	0	} else if pos > 0 && pos < stop_offset - 1 {
456	0	// other rows, get row up and row down relative to our current row
457	0	// ignore last row of each mcu
458	0	row_up = &component.raw_coeff[(pos - 1) * stride..pos * stride];
459	0	row_down = &component.raw_coeff[(pos + 1) * stride..(pos + 2) * stride];
460	0	} else if pos == stop_offset - 1 {
461	0	// last MCU in a row
462	0	//
463	0	// we need a row at the next MCU but we haven't decoded that MCU yet
464	0	// so we should save this and when we have the next MCU,
465	0	// do the upsampling
466	0
467	0	// store the current row and previous row in a buffer
468	0	let prev_row = &component.raw_coeff[(pos - 1) * stride..pos * stride];
469	0
470	0	component.row_up.copy_from_slice(prev_row);
471	0	component.row.copy_from_slice(curr_row);
472	0	upsample = false;
473	0	} else {
474	0	unreachable!("Uh oh!");
475		}
476	0	if upsample {
477	0	dest =
478	0	&mut component.upsample_dest[dest_start..dest_start + stride_bytes_written];
479	0	dest_start += stride_bytes_written;
480	0	}
481
482	0	if upsample {
483	0	// upsample
484	0	(component.up_sampler)(
485	0	curr_row,
486	0	row_up,
487	0	row_down,
488	0	upsampler_scratch_space,
489	0	dest
490	0	);
491	0	}
492		}
493		}
494		SampleRatios::H => {
495		//assert_eq!(component.raw_coeff.len() * 2, component.upsample_dest.len());
496		// Before it was an assert, but numerous and numerous and numerous
497		// bug fixes and ad hoc solutions later, I have now just decided to keep it as a resize
498	0	component
499	0	.upsample_dest
500	0	.resize(component.raw_coeff.len() * 2, 0);
501
502	0	let raw_coeff = &component.raw_coeff;
503	0	let dest_coeff = &mut component.upsample_dest;
504
505	0	if has_vertical_sample {
506	0	/*
507	0	There have been images that have the following configurations.
508	0
509	0	Component ID:Y HS:2 VS:2 QT:0
510	0	Component ID:Cb HS:1 VS:1 QT:1
511	0	Component ID:Cr HS:1 VS:2 QT:1
512	0
513	0	This brings out a nasty case of misaligned sampling factors. Cr will need to save a row because
514	0	of the way we process boundaries but Cb won't since Cr is horizontally sampled while Cb is
515	0	HV sampled with respect to the image sampling factors.
516	0
517	0	So during decoding of one MCU, we could only do 7 and not 8 rows, but the SampleRatio::H never had to
518	0	save a single line, since it doesn't suffer from boundary issues.
519	0
520	0	Now this takes care of that, saving the last MCU row in case it will be needed.
521	0	We save the previous row before up-sampling this row because the boundary issue is in
522	0	the last MCU row of the previous MCU.
523	0
524	0	PS(cae): I can't add the image to the repo as it is nsfw, but can send if required
525	0	*/
526	0	let length = component.first_row_upsample_dest.len();
527	0	component
528	0	.first_row_upsample_dest
529	0	.copy_from_slice(&dest_coeff.rchunks_exact(length).next().unwrap());
530	0	}
531		// up-sample each row
532	0	for (single_row, output_stride) in raw_coeff
533	0	.chunks_exact(component.width_stride)
534	0	.zip(dest_coeff.chunks_exact_mut(component.width_stride * 2))
535	0	{
536	0	// upsample using the fn pointer, should only be H, so no need for
537	0	// row up and row down
538	0	(component.up_sampler)(single_row, &[], &[], &mut [], output_stride);
539	0	}
540		}
541	0	SampleRatios::Generic(h, v) => {
542	0	let raw_coeff = &component.raw_coeff;
543	0	let dest_coeff = &mut component.upsample_dest;
544
545		//let size = component.width_stride.div_ceil(v);
546
547		// for (single_row, output_stride) in raw_coeff
548		// .chunks_exact(size)
549		// .zip(dest_coeff.chunks_exact_mut(component.width_stride * h))
550		// {
551		// (component.up_sampler)(single_row, &[], &[], &mut [], output_stride);
552		//
553		// }
554	0	for (single_row, output_stride) in raw_coeff
555	0	.chunks_exact(component.width_stride)
556	0	.zip(dest_coeff.chunks_exact_mut(component.width_stride * h * v))
557		{
558	0	for row in output_stride.chunks_exact_mut(component.width_stride * h) {
559	0	(component.up_sampler)(single_row, &[], &[], &mut [], row);
560	0	}
561		}
562		}
563	0	SampleRatios::None => {}
564		};
565	0	}