/src/libjxl/lib/jxl/dct_block-inl.h
Line | Count | Source |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | |
6 | | // Adapters for DCT input/output: from/to contiguous blocks or image rows. |
7 | | |
8 | | #include "lib/jxl/base/compiler_specific.h" |
9 | | |
10 | | #if defined(LIB_JXL_DCT_BLOCK_INL_H_) == defined(HWY_TARGET_TOGGLE) |
11 | | #ifdef LIB_JXL_DCT_BLOCK_INL_H_ |
12 | | #undef LIB_JXL_DCT_BLOCK_INL_H_ |
13 | | #else |
14 | | #define LIB_JXL_DCT_BLOCK_INL_H_ |
15 | | #endif |
16 | | |
17 | | #include <cstddef> |
18 | | #include <hwy/highway.h> |
19 | | |
20 | | #include "lib/jxl/base/status.h" |
21 | | |
22 | | HWY_BEFORE_NAMESPACE(); |
23 | | namespace jxl { |
24 | | namespace HWY_NAMESPACE { |
25 | | namespace { |
26 | | |
27 | | // These templates are not found via ADL. |
28 | | using hwy::HWY_NAMESPACE::Vec; |
29 | | |
30 | | // Block: (x, y) <-> (N * y + x) |
31 | | // Lines: (x, y) <-> (stride * y + x) |
32 | | // |
33 | | // I.e. Block is a specialization of Lines with fixed stride. |
34 | | // |
35 | | // FromXXX should implement Read and Load (Read vector). |
36 | | // ToXXX should implement Write and Store (Write vector). |
37 | | |
38 | | template <size_t N> |
39 | | using BlockDesc = HWY_CAPPED(float, N); |
40 | | |
41 | | // Here and in the following, the SZ template parameter specifies the number of |
42 | | // values to load/store. Needed because we want to handle 4x4 sub-blocks of |
43 | | // 16x16 blocks. |
44 | | class DCTFrom { |
45 | | public: |
46 | 173M | DCTFrom(const float* data, size_t stride) : stride_(stride), data_(data) {} Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) enc_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) Line | Count | Source | 46 | 2.19M | DCTFrom(const float* data, size_t stride) : stride_(stride), data_(data) {} |
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) Line | Count | Source | 46 | 5.77M | DCTFrom(const float* data, size_t stride) : stride_(stride), data_(data) {} |
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) Line | Count | Source | 46 | 158M | DCTFrom(const float* data, size_t stride) : stride_(stride), data_(data) {} |
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) dec_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) Line | Count | Source | 46 | 7.37M | DCTFrom(const float* data, size_t stride) : stride_(stride), data_(data) {} |
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) |
47 | | |
48 | | template <typename D> |
49 | 921M | HWY_INLINE Vec<D> LoadPart(D /* tag */, const size_t row, size_t i) const { |
50 | 921M | JXL_DASSERT(Lanes(D()) <= stride_); |
51 | | // Since these functions are used also for DC, no alignment at all is |
52 | | // guaranteed in the case of floating blocks. |
53 | | // TODO(veluca): consider using a different class for DC-to-LF and |
54 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. |
55 | 921M | return LoadU(D(), Address(row, i)); |
56 | 921M | } Unexecuted instantiation: enc_adaptive_quantization.cc:decltype (Zero((hwy::N_SSE4::Simd<float, 4ul, 0>)())) jxl::N_SSE4::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE4::Simd<float, 4ul, 0> >(hwy::N_SSE4::Simd<float, 4ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_adaptive_quantization.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 4ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 4ul, 0> >(hwy::N_AVX2::Simd<float, 4ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_adaptive_quantization.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 8ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 8ul, 0> >(hwy::N_AVX2::Simd<float, 8ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_adaptive_quantization.cc:decltype (Zero((hwy::N_SSE2::Simd<float, 4ul, 0>)())) jxl::N_SSE2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE2::Simd<float, 4ul, 0> >(hwy::N_SSE2::Simd<float, 4ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:decltype (Zero((hwy::N_SSE4::Simd<float, 4ul, 0>)())) jxl::N_SSE4::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE4::Simd<float, 4ul, 0> >(hwy::N_SSE4::Simd<float, 4ul, 0>, unsigned long, unsigned long) const enc_group.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 4ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 4ul, 0> >(hwy::N_AVX2::Simd<float, 4ul, 0>, unsigned long, unsigned long) const Line | Count | Source | 49 | 2.28M | HWY_INLINE Vec<D> LoadPart(D /* tag */, const size_t row, size_t i) const { | 50 | 2.28M | JXL_DASSERT(Lanes(D()) <= stride_); | 51 | | // Since these functions are used also for DC, no alignment at all is | 52 | | // guaranteed in the case of floating blocks. | 53 | | // TODO(veluca): consider using a different class for DC-to-LF and | 54 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 55 | 2.28M | return LoadU(D(), Address(row, i)); | 56 | 2.28M | } |
enc_group.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 8ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 8ul, 0> >(hwy::N_AVX2::Simd<float, 8ul, 0>, unsigned long, unsigned long) const Line | Count | Source | 49 | 19.8M | HWY_INLINE Vec<D> LoadPart(D /* tag */, const size_t row, size_t i) const { | 50 | 19.8M | JXL_DASSERT(Lanes(D()) <= stride_); | 51 | | // Since these functions are used also for DC, no alignment at all is | 52 | | // guaranteed in the case of floating blocks. | 53 | | // TODO(veluca): consider using a different class for DC-to-LF and | 54 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 55 | 19.8M | return LoadU(D(), Address(row, i)); | 56 | 19.8M | } |
Unexecuted instantiation: enc_group.cc:decltype (Zero((hwy::N_SSE2::Simd<float, 4ul, 0>)())) jxl::N_SSE2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE2::Simd<float, 4ul, 0> >(hwy::N_SSE2::Simd<float, 4ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:decltype (Zero((hwy::N_SSE4::Simd<float, 1ul, 0>)())) jxl::N_SSE4::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE4::Simd<float, 1ul, 0> >(hwy::N_SSE4::Simd<float, 1ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:decltype (Zero((hwy::N_SSE4::Simd<float, 2ul, 0>)())) jxl::N_SSE4::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE4::Simd<float, 2ul, 0> >(hwy::N_SSE4::Simd<float, 2ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 1ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 1ul, 0> >(hwy::N_AVX2::Simd<float, 1ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 2ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 2ul, 0> >(hwy::N_AVX2::Simd<float, 2ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:decltype (Zero((hwy::N_SSE2::Simd<float, 1ul, 0>)())) jxl::N_SSE2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE2::Simd<float, 1ul, 0> >(hwy::N_SSE2::Simd<float, 1ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:decltype (Zero((hwy::N_SSE2::Simd<float, 2ul, 0>)())) jxl::N_SSE2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE2::Simd<float, 2ul, 0> >(hwy::N_SSE2::Simd<float, 2ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:decltype (Zero((hwy::N_SSE4::Simd<float, 4ul, 0>)())) jxl::N_SSE4::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE4::Simd<float, 4ul, 0> >(hwy::N_SSE4::Simd<float, 4ul, 0>, unsigned long, unsigned long) const enc_chroma_from_luma.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 4ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 4ul, 0> >(hwy::N_AVX2::Simd<float, 4ul, 0>, unsigned long, unsigned long) const Line | Count | Source | 49 | 2.28M | HWY_INLINE Vec<D> LoadPart(D /* tag */, const size_t row, size_t i) const { | 50 | 2.28M | JXL_DASSERT(Lanes(D()) <= stride_); | 51 | | // Since these functions are used also for DC, no alignment at all is | 52 | | // guaranteed in the case of floating blocks. | 53 | | // TODO(veluca): consider using a different class for DC-to-LF and | 54 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 55 | 2.28M | return LoadU(D(), Address(row, i)); | 56 | 2.28M | } |
enc_chroma_from_luma.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 8ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 8ul, 0> >(hwy::N_AVX2::Simd<float, 8ul, 0>, unsigned long, unsigned long) const Line | Count | Source | 49 | 48.5M | HWY_INLINE Vec<D> LoadPart(D /* tag */, const size_t row, size_t i) const { | 50 | 48.5M | JXL_DASSERT(Lanes(D()) <= stride_); | 51 | | // Since these functions are used also for DC, no alignment at all is | 52 | | // guaranteed in the case of floating blocks. | 53 | | // TODO(veluca): consider using a different class for DC-to-LF and | 54 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 55 | 48.5M | return LoadU(D(), Address(row, i)); | 56 | 48.5M | } |
Unexecuted instantiation: enc_chroma_from_luma.cc:decltype (Zero((hwy::N_SSE2::Simd<float, 4ul, 0>)())) jxl::N_SSE2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE2::Simd<float, 4ul, 0> >(hwy::N_SSE2::Simd<float, 4ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:decltype (Zero((hwy::N_SSE4::Simd<float, 1ul, 0>)())) jxl::N_SSE4::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE4::Simd<float, 1ul, 0> >(hwy::N_SSE4::Simd<float, 1ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:decltype (Zero((hwy::N_SSE4::Simd<float, 2ul, 0>)())) jxl::N_SSE4::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE4::Simd<float, 2ul, 0> >(hwy::N_SSE4::Simd<float, 2ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 1ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 1ul, 0> >(hwy::N_AVX2::Simd<float, 1ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 2ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 2ul, 0> >(hwy::N_AVX2::Simd<float, 2ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:decltype (Zero((hwy::N_SSE2::Simd<float, 1ul, 0>)())) jxl::N_SSE2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE2::Simd<float, 1ul, 0> >(hwy::N_SSE2::Simd<float, 1ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:decltype (Zero((hwy::N_SSE2::Simd<float, 2ul, 0>)())) jxl::N_SSE2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE2::Simd<float, 2ul, 0> >(hwy::N_SSE2::Simd<float, 2ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:decltype (Zero((hwy::N_SSE4::Simd<float, 4ul, 0>)())) jxl::N_SSE4::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE4::Simd<float, 4ul, 0> >(hwy::N_SSE4::Simd<float, 4ul, 0>, unsigned long, unsigned long) const enc_ac_strategy.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 4ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 4ul, 0> >(hwy::N_AVX2::Simd<float, 4ul, 0>, unsigned long, unsigned long) const Line | Count | Source | 49 | 152M | HWY_INLINE Vec<D> LoadPart(D /* tag */, const size_t row, size_t i) const { | 50 | 152M | JXL_DASSERT(Lanes(D()) <= stride_); | 51 | | // Since these functions are used also for DC, no alignment at all is | 52 | | // guaranteed in the case of floating blocks. | 53 | | // TODO(veluca): consider using a different class for DC-to-LF and | 54 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 55 | 152M | return LoadU(D(), Address(row, i)); | 56 | 152M | } |
enc_ac_strategy.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 8ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 8ul, 0> >(hwy::N_AVX2::Simd<float, 8ul, 0>, unsigned long, unsigned long) const Line | Count | Source | 49 | 671M | HWY_INLINE Vec<D> LoadPart(D /* tag */, const size_t row, size_t i) const { | 50 | 671M | JXL_DASSERT(Lanes(D()) <= stride_); | 51 | | // Since these functions are used also for DC, no alignment at all is | 52 | | // guaranteed in the case of floating blocks. | 53 | | // TODO(veluca): consider using a different class for DC-to-LF and | 54 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 55 | 671M | return LoadU(D(), Address(row, i)); | 56 | 671M | } |
Unexecuted instantiation: enc_ac_strategy.cc:decltype (Zero((hwy::N_SSE2::Simd<float, 4ul, 0>)())) jxl::N_SSE2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE2::Simd<float, 4ul, 0> >(hwy::N_SSE2::Simd<float, 4ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:decltype (Zero((hwy::N_SSE4::Simd<float, 1ul, 0>)())) jxl::N_SSE4::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE4::Simd<float, 1ul, 0> >(hwy::N_SSE4::Simd<float, 1ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:decltype (Zero((hwy::N_SSE4::Simd<float, 2ul, 0>)())) jxl::N_SSE4::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE4::Simd<float, 2ul, 0> >(hwy::N_SSE4::Simd<float, 2ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 1ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 1ul, 0> >(hwy::N_AVX2::Simd<float, 1ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 2ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 2ul, 0> >(hwy::N_AVX2::Simd<float, 2ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:decltype (Zero((hwy::N_SSE2::Simd<float, 1ul, 0>)())) jxl::N_SSE2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE2::Simd<float, 1ul, 0> >(hwy::N_SSE2::Simd<float, 1ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:decltype (Zero((hwy::N_SSE2::Simd<float, 2ul, 0>)())) jxl::N_SSE2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE2::Simd<float, 2ul, 0> >(hwy::N_SSE2::Simd<float, 2ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: dec_group.cc:decltype (Zero((hwy::N_SSE4::Simd<float, 4ul, 0>)())) jxl::N_SSE4::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE4::Simd<float, 4ul, 0> >(hwy::N_SSE4::Simd<float, 4ul, 0>, unsigned long, unsigned long) const dec_group.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 8ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 8ul, 0> >(hwy::N_AVX2::Simd<float, 8ul, 0>, unsigned long, unsigned long) const Line | Count | Source | 49 | 23.0M | HWY_INLINE Vec<D> LoadPart(D /* tag */, const size_t row, size_t i) const { | 50 | 23.0M | JXL_DASSERT(Lanes(D()) <= stride_); | 51 | | // Since these functions are used also for DC, no alignment at all is | 52 | | // guaranteed in the case of floating blocks. | 53 | | // TODO(veluca): consider using a different class for DC-to-LF and | 54 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 55 | 23.0M | return LoadU(D(), Address(row, i)); | 56 | 23.0M | } |
Unexecuted instantiation: dec_group.cc:decltype (Zero((hwy::N_SSE2::Simd<float, 4ul, 0>)())) jxl::N_SSE2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE2::Simd<float, 4ul, 0> >(hwy::N_SSE2::Simd<float, 4ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: dec_group.cc:decltype (Zero((hwy::N_SSE4::Simd<float, 1ul, 0>)())) jxl::N_SSE4::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE4::Simd<float, 1ul, 0> >(hwy::N_SSE4::Simd<float, 1ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: dec_group.cc:decltype (Zero((hwy::N_SSE4::Simd<float, 2ul, 0>)())) jxl::N_SSE4::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE4::Simd<float, 2ul, 0> >(hwy::N_SSE4::Simd<float, 2ul, 0>, unsigned long, unsigned long) const dec_group.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 1ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 1ul, 0> >(hwy::N_AVX2::Simd<float, 1ul, 0>, unsigned long, unsigned long) const Line | Count | Source | 49 | 326k | HWY_INLINE Vec<D> LoadPart(D /* tag */, const size_t row, size_t i) const { | 50 | 326k | JXL_DASSERT(Lanes(D()) <= stride_); | 51 | | // Since these functions are used also for DC, no alignment at all is | 52 | | // guaranteed in the case of floating blocks. | 53 | | // TODO(veluca): consider using a different class for DC-to-LF and | 54 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 55 | 326k | return LoadU(D(), Address(row, i)); | 56 | 326k | } |
dec_group.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 2ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 2ul, 0> >(hwy::N_AVX2::Simd<float, 2ul, 0>, unsigned long, unsigned long) const Line | Count | Source | 49 | 545k | HWY_INLINE Vec<D> LoadPart(D /* tag */, const size_t row, size_t i) const { | 50 | 545k | JXL_DASSERT(Lanes(D()) <= stride_); | 51 | | // Since these functions are used also for DC, no alignment at all is | 52 | | // guaranteed in the case of floating blocks. | 53 | | // TODO(veluca): consider using a different class for DC-to-LF and | 54 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 55 | 545k | return LoadU(D(), Address(row, i)); | 56 | 545k | } |
dec_group.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 4ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 4ul, 0> >(hwy::N_AVX2::Simd<float, 4ul, 0>, unsigned long, unsigned long) const Line | Count | Source | 49 | 378k | HWY_INLINE Vec<D> LoadPart(D /* tag */, const size_t row, size_t i) const { | 50 | 378k | JXL_DASSERT(Lanes(D()) <= stride_); | 51 | | // Since these functions are used also for DC, no alignment at all is | 52 | | // guaranteed in the case of floating blocks. | 53 | | // TODO(veluca): consider using a different class for DC-to-LF and | 54 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 55 | 378k | return LoadU(D(), Address(row, i)); | 56 | 378k | } |
Unexecuted instantiation: dec_group.cc:decltype (Zero((hwy::N_SSE2::Simd<float, 1ul, 0>)())) jxl::N_SSE2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE2::Simd<float, 1ul, 0> >(hwy::N_SSE2::Simd<float, 1ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: dec_group.cc:decltype (Zero((hwy::N_SSE2::Simd<float, 2ul, 0>)())) jxl::N_SSE2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE2::Simd<float, 2ul, 0> >(hwy::N_SSE2::Simd<float, 2ul, 0>, unsigned long, unsigned long) const |
57 | | |
58 | 1.43G | HWY_INLINE float Read(const size_t row, const size_t i) const { |
59 | 1.43G | return *Address(row, i); |
60 | 1.43G | } Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const enc_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const Line | Count | Source | 58 | 12.2M | HWY_INLINE float Read(const size_t row, const size_t i) const { | 59 | 12.2M | return *Address(row, i); | 60 | 12.2M | } |
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const Line | Count | Source | 58 | 12.2M | HWY_INLINE float Read(const size_t row, const size_t i) const { | 59 | 12.2M | return *Address(row, i); | 60 | 12.2M | } |
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const Line | Count | Source | 58 | 1.37G | HWY_INLINE float Read(const size_t row, const size_t i) const { | 59 | 1.37G | return *Address(row, i); | 60 | 1.37G | } |
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const dec_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const Line | Count | Source | 58 | 37.0M | HWY_INLINE float Read(const size_t row, const size_t i) const { | 59 | 37.0M | return *Address(row, i); | 60 | 37.0M | } |
Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const |
61 | | |
62 | | constexpr HWY_INLINE const float* Address(const size_t row, |
63 | 2.41G | const size_t i) const { |
64 | 2.41G | return data_ + row * stride_ + i; |
65 | 2.41G | } Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const enc_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const Line | Count | Source | 63 | 34.6M | const size_t i) const { | 64 | 34.6M | return data_ + row * stride_ + i; | 65 | 34.6M | } |
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const Line | Count | Source | 63 | 63.2M | const size_t i) const { | 64 | 63.2M | return data_ + row * stride_ + i; | 65 | 63.2M | } |
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const Line | Count | Source | 63 | 2.25G | const size_t i) const { | 64 | 2.25G | return data_ + row * stride_ + i; | 65 | 2.25G | } |
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const dec_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const Line | Count | Source | 63 | 65.9M | const size_t i) const { | 64 | 65.9M | return data_ + row * stride_ + i; | 65 | 65.9M | } |
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const |
66 | | |
67 | 58.9M | size_t Stride() const { return stride_; } Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Stride() const Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Stride() const Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Stride() const Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Stride() const enc_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Stride() const Line | Count | Source | 67 | 195k | size_t Stride() const { return stride_; } |
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Stride() const Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Stride() const enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Stride() const Line | Count | Source | 67 | 195k | size_t Stride() const { return stride_; } |
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Stride() const Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Stride() const enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Stride() const Line | Count | Source | 67 | 53.8M | size_t Stride() const { return stride_; } |
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Stride() const Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Stride() const dec_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Stride() const Line | Count | Source | 67 | 4.65M | size_t Stride() const { return stride_; } |
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Stride() const |
68 | | |
69 | | private: |
70 | | size_t stride_; |
71 | | const float* JXL_RESTRICT data_; |
72 | | }; |
73 | | |
74 | | class DCTTo { |
75 | | public: |
76 | 173M | DCTTo(float* data, size_t stride) : stride_(stride), data_(data) {} Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) enc_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) Line | Count | Source | 76 | 2.19M | DCTTo(float* data, size_t stride) : stride_(stride), data_(data) {} |
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) Line | Count | Source | 76 | 5.77M | DCTTo(float* data, size_t stride) : stride_(stride), data_(data) {} |
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) Line | Count | Source | 76 | 158M | DCTTo(float* data, size_t stride) : stride_(stride), data_(data) {} |
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) dec_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) Line | Count | Source | 76 | 7.37M | DCTTo(float* data, size_t stride) : stride_(stride), data_(data) {} |
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) |
77 | | |
78 | | template <typename D> |
79 | | HWY_INLINE void StorePart(D /* tag */, const Vec<D>& v, const size_t row, |
80 | 921M | size_t i) const { |
81 | 921M | JXL_DASSERT(Lanes(D()) <= stride_); |
82 | | // Since these functions are used also for DC, no alignment at all is |
83 | | // guaranteed in the case of floating blocks. |
84 | | // TODO(veluca): consider using a different class for DC-to-LF and |
85 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. |
86 | 921M | StoreU(v, D(), Address(row, i)); |
87 | 921M | } Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE4::Simd<float, 4ul, 0> >(hwy::N_SSE4::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_SSE4::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 4ul, 0> >(hwy::N_AVX2::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 8ul, 0> >(hwy::N_AVX2::Simd<float, 8ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 8ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE2::Simd<float, 4ul, 0> >(hwy::N_SSE2::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_SSE2::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE4::Simd<float, 4ul, 0> >(hwy::N_SSE4::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_SSE4::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 4ul, 0> >(hwy::N_AVX2::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const Line | Count | Source | 80 | 2.28M | size_t i) const { | 81 | 2.28M | JXL_DASSERT(Lanes(D()) <= stride_); | 82 | | // Since these functions are used also for DC, no alignment at all is | 83 | | // guaranteed in the case of floating blocks. | 84 | | // TODO(veluca): consider using a different class for DC-to-LF and | 85 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 86 | 2.28M | StoreU(v, D(), Address(row, i)); | 87 | 2.28M | } |
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 8ul, 0> >(hwy::N_AVX2::Simd<float, 8ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 8ul, 0>)())) const&, unsigned long, unsigned long) const Line | Count | Source | 80 | 19.8M | size_t i) const { | 81 | 19.8M | JXL_DASSERT(Lanes(D()) <= stride_); | 82 | | // Since these functions are used also for DC, no alignment at all is | 83 | | // guaranteed in the case of floating blocks. | 84 | | // TODO(veluca): consider using a different class for DC-to-LF and | 85 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 86 | 19.8M | StoreU(v, D(), Address(row, i)); | 87 | 19.8M | } |
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE2::Simd<float, 4ul, 0> >(hwy::N_SSE2::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_SSE2::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE4::Simd<float, 1ul, 0> >(hwy::N_SSE4::Simd<float, 1ul, 0>, decltype (Zero((hwy::N_SSE4::Simd<float, 1ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE4::Simd<float, 2ul, 0> >(hwy::N_SSE4::Simd<float, 2ul, 0>, decltype (Zero((hwy::N_SSE4::Simd<float, 2ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 1ul, 0> >(hwy::N_AVX2::Simd<float, 1ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 1ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 2ul, 0> >(hwy::N_AVX2::Simd<float, 2ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 2ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE2::Simd<float, 1ul, 0> >(hwy::N_SSE2::Simd<float, 1ul, 0>, decltype (Zero((hwy::N_SSE2::Simd<float, 1ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE2::Simd<float, 2ul, 0> >(hwy::N_SSE2::Simd<float, 2ul, 0>, decltype (Zero((hwy::N_SSE2::Simd<float, 2ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE4::Simd<float, 4ul, 0> >(hwy::N_SSE4::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_SSE4::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 4ul, 0> >(hwy::N_AVX2::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const Line | Count | Source | 80 | 2.28M | size_t i) const { | 81 | 2.28M | JXL_DASSERT(Lanes(D()) <= stride_); | 82 | | // Since these functions are used also for DC, no alignment at all is | 83 | | // guaranteed in the case of floating blocks. | 84 | | // TODO(veluca): consider using a different class for DC-to-LF and | 85 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 86 | 2.28M | StoreU(v, D(), Address(row, i)); | 87 | 2.28M | } |
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 8ul, 0> >(hwy::N_AVX2::Simd<float, 8ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 8ul, 0>)())) const&, unsigned long, unsigned long) const Line | Count | Source | 80 | 48.5M | size_t i) const { | 81 | 48.5M | JXL_DASSERT(Lanes(D()) <= stride_); | 82 | | // Since these functions are used also for DC, no alignment at all is | 83 | | // guaranteed in the case of floating blocks. | 84 | | // TODO(veluca): consider using a different class for DC-to-LF and | 85 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 86 | 48.5M | StoreU(v, D(), Address(row, i)); | 87 | 48.5M | } |
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE2::Simd<float, 4ul, 0> >(hwy::N_SSE2::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_SSE2::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE4::Simd<float, 1ul, 0> >(hwy::N_SSE4::Simd<float, 1ul, 0>, decltype (Zero((hwy::N_SSE4::Simd<float, 1ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE4::Simd<float, 2ul, 0> >(hwy::N_SSE4::Simd<float, 2ul, 0>, decltype (Zero((hwy::N_SSE4::Simd<float, 2ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 1ul, 0> >(hwy::N_AVX2::Simd<float, 1ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 1ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 2ul, 0> >(hwy::N_AVX2::Simd<float, 2ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 2ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE2::Simd<float, 1ul, 0> >(hwy::N_SSE2::Simd<float, 1ul, 0>, decltype (Zero((hwy::N_SSE2::Simd<float, 1ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE2::Simd<float, 2ul, 0> >(hwy::N_SSE2::Simd<float, 2ul, 0>, decltype (Zero((hwy::N_SSE2::Simd<float, 2ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE4::Simd<float, 4ul, 0> >(hwy::N_SSE4::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_SSE4::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 4ul, 0> >(hwy::N_AVX2::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const Line | Count | Source | 80 | 152M | size_t i) const { | 81 | 152M | JXL_DASSERT(Lanes(D()) <= stride_); | 82 | | // Since these functions are used also for DC, no alignment at all is | 83 | | // guaranteed in the case of floating blocks. | 84 | | // TODO(veluca): consider using a different class for DC-to-LF and | 85 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 86 | 152M | StoreU(v, D(), Address(row, i)); | 87 | 152M | } |
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 8ul, 0> >(hwy::N_AVX2::Simd<float, 8ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 8ul, 0>)())) const&, unsigned long, unsigned long) const Line | Count | Source | 80 | 671M | size_t i) const { | 81 | 671M | JXL_DASSERT(Lanes(D()) <= stride_); | 82 | | // Since these functions are used also for DC, no alignment at all is | 83 | | // guaranteed in the case of floating blocks. | 84 | | // TODO(veluca): consider using a different class for DC-to-LF and | 85 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 86 | 671M | StoreU(v, D(), Address(row, i)); | 87 | 671M | } |
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE2::Simd<float, 4ul, 0> >(hwy::N_SSE2::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_SSE2::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE4::Simd<float, 1ul, 0> >(hwy::N_SSE4::Simd<float, 1ul, 0>, decltype (Zero((hwy::N_SSE4::Simd<float, 1ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE4::Simd<float, 2ul, 0> >(hwy::N_SSE4::Simd<float, 2ul, 0>, decltype (Zero((hwy::N_SSE4::Simd<float, 2ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 1ul, 0> >(hwy::N_AVX2::Simd<float, 1ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 1ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 2ul, 0> >(hwy::N_AVX2::Simd<float, 2ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 2ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE2::Simd<float, 1ul, 0> >(hwy::N_SSE2::Simd<float, 1ul, 0>, decltype (Zero((hwy::N_SSE2::Simd<float, 1ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE2::Simd<float, 2ul, 0> >(hwy::N_SSE2::Simd<float, 2ul, 0>, decltype (Zero((hwy::N_SSE2::Simd<float, 2ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE4::Simd<float, 4ul, 0> >(hwy::N_SSE4::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_SSE4::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 8ul, 0> >(hwy::N_AVX2::Simd<float, 8ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 8ul, 0>)())) const&, unsigned long, unsigned long) const Line | Count | Source | 80 | 23.0M | size_t i) const { | 81 | 23.0M | JXL_DASSERT(Lanes(D()) <= stride_); | 82 | | // Since these functions are used also for DC, no alignment at all is | 83 | | // guaranteed in the case of floating blocks. | 84 | | // TODO(veluca): consider using a different class for DC-to-LF and | 85 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 86 | 23.0M | StoreU(v, D(), Address(row, i)); | 87 | 23.0M | } |
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE2::Simd<float, 4ul, 0> >(hwy::N_SSE2::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_SSE2::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE4::Simd<float, 1ul, 0> >(hwy::N_SSE4::Simd<float, 1ul, 0>, decltype (Zero((hwy::N_SSE4::Simd<float, 1ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE4::Simd<float, 2ul, 0> >(hwy::N_SSE4::Simd<float, 2ul, 0>, decltype (Zero((hwy::N_SSE4::Simd<float, 2ul, 0>)())) const&, unsigned long, unsigned long) const dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 1ul, 0> >(hwy::N_AVX2::Simd<float, 1ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 1ul, 0>)())) const&, unsigned long, unsigned long) const Line | Count | Source | 80 | 326k | size_t i) const { | 81 | 326k | JXL_DASSERT(Lanes(D()) <= stride_); | 82 | | // Since these functions are used also for DC, no alignment at all is | 83 | | // guaranteed in the case of floating blocks. | 84 | | // TODO(veluca): consider using a different class for DC-to-LF and | 85 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 86 | 326k | StoreU(v, D(), Address(row, i)); | 87 | 326k | } |
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 2ul, 0> >(hwy::N_AVX2::Simd<float, 2ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 2ul, 0>)())) const&, unsigned long, unsigned long) const Line | Count | Source | 80 | 545k | size_t i) const { | 81 | 545k | JXL_DASSERT(Lanes(D()) <= stride_); | 82 | | // Since these functions are used also for DC, no alignment at all is | 83 | | // guaranteed in the case of floating blocks. | 84 | | // TODO(veluca): consider using a different class for DC-to-LF and | 85 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 86 | 545k | StoreU(v, D(), Address(row, i)); | 87 | 545k | } |
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 4ul, 0> >(hwy::N_AVX2::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const Line | Count | Source | 80 | 378k | size_t i) const { | 81 | 378k | JXL_DASSERT(Lanes(D()) <= stride_); | 82 | | // Since these functions are used also for DC, no alignment at all is | 83 | | // guaranteed in the case of floating blocks. | 84 | | // TODO(veluca): consider using a different class for DC-to-LF and | 85 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 86 | 378k | StoreU(v, D(), Address(row, i)); | 87 | 378k | } |
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE2::Simd<float, 1ul, 0> >(hwy::N_SSE2::Simd<float, 1ul, 0>, decltype (Zero((hwy::N_SSE2::Simd<float, 1ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE2::Simd<float, 2ul, 0> >(hwy::N_SSE2::Simd<float, 2ul, 0>, decltype (Zero((hwy::N_SSE2::Simd<float, 2ul, 0>)())) const&, unsigned long, unsigned long) const |
88 | | |
89 | 1.43G | HWY_INLINE void Write(float v, const size_t row, const size_t i) const { |
90 | 1.43G | *Address(row, i) = v; |
91 | 1.43G | } Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const enc_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const Line | Count | Source | 89 | 12.2M | HWY_INLINE void Write(float v, const size_t row, const size_t i) const { | 90 | 12.2M | *Address(row, i) = v; | 91 | 12.2M | } |
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const Line | Count | Source | 89 | 12.2M | HWY_INLINE void Write(float v, const size_t row, const size_t i) const { | 90 | 12.2M | *Address(row, i) = v; | 91 | 12.2M | } |
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const Line | Count | Source | 89 | 1.37G | HWY_INLINE void Write(float v, const size_t row, const size_t i) const { | 90 | 1.37G | *Address(row, i) = v; | 91 | 1.37G | } |
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const dec_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const Line | Count | Source | 89 | 37.0M | HWY_INLINE void Write(float v, const size_t row, const size_t i) const { | 90 | 37.0M | *Address(row, i) = v; | 91 | 37.0M | } |
Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const |
92 | | |
93 | 2.41G | constexpr HWY_INLINE float* Address(const size_t row, const size_t i) const { |
94 | 2.41G | return data_ + row * stride_ + i; |
95 | 2.41G | } Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const enc_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const Line | Count | Source | 93 | 34.6M | constexpr HWY_INLINE float* Address(const size_t row, const size_t i) const { | 94 | 34.6M | return data_ + row * stride_ + i; | 95 | 34.6M | } |
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const Line | Count | Source | 93 | 63.2M | constexpr HWY_INLINE float* Address(const size_t row, const size_t i) const { | 94 | 63.2M | return data_ + row * stride_ + i; | 95 | 63.2M | } |
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const Line | Count | Source | 93 | 2.25G | constexpr HWY_INLINE float* Address(const size_t row, const size_t i) const { | 94 | 2.25G | return data_ + row * stride_ + i; | 95 | 2.25G | } |
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const dec_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const Line | Count | Source | 93 | 65.9M | constexpr HWY_INLINE float* Address(const size_t row, const size_t i) const { | 94 | 65.9M | return data_ + row * stride_ + i; | 95 | 65.9M | } |
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const |
96 | | |
97 | 58.9M | size_t Stride() const { return stride_; } Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Stride() const Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Stride() const Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Stride() const Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Stride() const enc_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Stride() const Line | Count | Source | 97 | 195k | size_t Stride() const { return stride_; } |
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Stride() const Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Stride() const enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Stride() const Line | Count | Source | 97 | 195k | size_t Stride() const { return stride_; } |
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Stride() const Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Stride() const enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Stride() const Line | Count | Source | 97 | 53.8M | size_t Stride() const { return stride_; } |
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Stride() const Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Stride() const dec_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Stride() const Line | Count | Source | 97 | 4.65M | size_t Stride() const { return stride_; } |
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Stride() const |
98 | | |
99 | | private: |
100 | | size_t stride_; |
101 | | float* JXL_RESTRICT data_; |
102 | | }; |
103 | | |
104 | | } // namespace |
105 | | // NOLINTNEXTLINE(google-readability-namespace-comments) |
106 | | } // namespace HWY_NAMESPACE |
107 | | } // namespace jxl |
108 | | HWY_AFTER_NAMESPACE(); |
109 | | |
110 | | #endif // LIB_JXL_DCT_BLOCK_INL_H_ |