/src/libjxl/lib/jxl/dct_block-inl.h
Line | Count | Source |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | |
6 | | // Adapters for DCT input/output: from/to contiguous blocks or image rows. |
7 | | |
8 | | #include "lib/jxl/base/compiler_specific.h" |
9 | | |
10 | | #if defined(LIB_JXL_DCT_BLOCK_INL_H_) == defined(HWY_TARGET_TOGGLE) |
11 | | #ifdef LIB_JXL_DCT_BLOCK_INL_H_ |
12 | | #undef LIB_JXL_DCT_BLOCK_INL_H_ |
13 | | #else |
14 | | #define LIB_JXL_DCT_BLOCK_INL_H_ |
15 | | #endif |
16 | | |
17 | | #include <cstddef> |
18 | | #include <hwy/highway.h> |
19 | | |
20 | | #include "lib/jxl/base/status.h" |
21 | | |
22 | | HWY_BEFORE_NAMESPACE(); |
23 | | namespace jxl { |
24 | | namespace HWY_NAMESPACE { |
25 | | namespace { |
26 | | |
27 | | // These templates are not found via ADL. |
28 | | using hwy::HWY_NAMESPACE::Vec; |
29 | | |
30 | | // Block: (x, y) <-> (N * y + x) |
31 | | // Lines: (x, y) <-> (stride * y + x) |
32 | | // |
33 | | // I.e. Block is a specialization of Lines with fixed stride. |
34 | | // |
35 | | // FromXXX should implement Read and Load (Read vector). |
36 | | // ToXXX should implement Write and Store (Write vector). |
37 | | |
38 | | template <size_t N> |
39 | | using BlockDesc = HWY_CAPPED(float, N); |
40 | | |
41 | | // Here and in the following, the SZ template parameter specifies the number of |
42 | | // values to load/store. Needed because we want to handle 4x4 sub-blocks of |
43 | | // 16x16 blocks. |
44 | | class DCTFrom { |
45 | | public: |
46 | 1.95G | DCTFrom(const float* data, size_t stride) : stride_(stride), data_(data) {}Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) enc_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) Line | Count | Source | 46 | 16.0M | DCTFrom(const float* data, size_t stride) : stride_(stride), data_(data) {} |
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) Line | Count | Source | 46 | 57.8M | DCTFrom(const float* data, size_t stride) : stride_(stride), data_(data) {} |
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) Line | Count | Source | 46 | 1.82G | DCTFrom(const float* data, size_t stride) : stride_(stride), data_(data) {} |
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) dec_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) Line | Count | Source | 46 | 49.5M | DCTFrom(const float* data, size_t stride) : stride_(stride), data_(data) {} |
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) |
47 | | |
48 | | template <typename D> |
49 | 10.0G | HWY_INLINE Vec<D> LoadPart(D /* tag */, const size_t row, size_t i) const { |
50 | 10.0G | JXL_DASSERT(Lanes(D()) <= stride_); |
51 | | // Since these functions are used also for DC, no alignment at all is |
52 | | // guaranteed in the case of floating blocks. |
53 | | // TODO(veluca): consider using a different class for DC-to-LF and |
54 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. |
55 | 10.0G | return LoadU(D(), Address(row, i)); |
56 | 10.0G | } Unexecuted instantiation: enc_adaptive_quantization.cc:decltype (Zero((hwy::N_SSE4::Simd<float, 4ul, 0>)())) jxl::N_SSE4::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE4::Simd<float, 4ul, 0> >(hwy::N_SSE4::Simd<float, 4ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_adaptive_quantization.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 4ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 4ul, 0> >(hwy::N_AVX2::Simd<float, 4ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_adaptive_quantization.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 8ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 8ul, 0> >(hwy::N_AVX2::Simd<float, 8ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_adaptive_quantization.cc:decltype (Zero((hwy::N_SSE2::Simd<float, 4ul, 0>)())) jxl::N_SSE2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE2::Simd<float, 4ul, 0> >(hwy::N_SSE2::Simd<float, 4ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:decltype (Zero((hwy::N_SSE4::Simd<float, 4ul, 0>)())) jxl::N_SSE4::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE4::Simd<float, 4ul, 0> >(hwy::N_SSE4::Simd<float, 4ul, 0>, unsigned long, unsigned long) const enc_group.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 4ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 4ul, 0> >(hwy::N_AVX2::Simd<float, 4ul, 0>, unsigned long, unsigned long) const Line | Count | Source | 49 | 9.65M | HWY_INLINE Vec<D> LoadPart(D /* tag */, const size_t row, size_t i) const { | 50 | 9.65M | JXL_DASSERT(Lanes(D()) <= stride_); | 51 | | // Since these functions are used also for DC, no alignment at all is | 52 | | // guaranteed in the case of floating blocks. | 53 | | // TODO(veluca): consider using a different class for DC-to-LF and | 54 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 55 | 9.65M | return LoadU(D(), Address(row, i)); | 56 | 9.65M | } |
enc_group.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 8ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 8ul, 0> >(hwy::N_AVX2::Simd<float, 8ul, 0>, unsigned long, unsigned long) const Line | Count | Source | 49 | 260M | HWY_INLINE Vec<D> LoadPart(D /* tag */, const size_t row, size_t i) const { | 50 | 260M | JXL_DASSERT(Lanes(D()) <= stride_); | 51 | | // Since these functions are used also for DC, no alignment at all is | 52 | | // guaranteed in the case of floating blocks. | 53 | | // TODO(veluca): consider using a different class for DC-to-LF and | 54 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 55 | 260M | return LoadU(D(), Address(row, i)); | 56 | 260M | } |
Unexecuted instantiation: enc_group.cc:decltype (Zero((hwy::N_SSE2::Simd<float, 4ul, 0>)())) jxl::N_SSE2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE2::Simd<float, 4ul, 0> >(hwy::N_SSE2::Simd<float, 4ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:decltype (Zero((hwy::N_SSE4::Simd<float, 1ul, 0>)())) jxl::N_SSE4::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE4::Simd<float, 1ul, 0> >(hwy::N_SSE4::Simd<float, 1ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:decltype (Zero((hwy::N_SSE4::Simd<float, 2ul, 0>)())) jxl::N_SSE4::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE4::Simd<float, 2ul, 0> >(hwy::N_SSE4::Simd<float, 2ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 1ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 1ul, 0> >(hwy::N_AVX2::Simd<float, 1ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 2ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 2ul, 0> >(hwy::N_AVX2::Simd<float, 2ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:decltype (Zero((hwy::N_SSE2::Simd<float, 1ul, 0>)())) jxl::N_SSE2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE2::Simd<float, 1ul, 0> >(hwy::N_SSE2::Simd<float, 1ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:decltype (Zero((hwy::N_SSE2::Simd<float, 2ul, 0>)())) jxl::N_SSE2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE2::Simd<float, 2ul, 0> >(hwy::N_SSE2::Simd<float, 2ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:decltype (Zero((hwy::N_SSE4::Simd<float, 4ul, 0>)())) jxl::N_SSE4::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE4::Simd<float, 4ul, 0> >(hwy::N_SSE4::Simd<float, 4ul, 0>, unsigned long, unsigned long) const enc_chroma_from_luma.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 4ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 4ul, 0> >(hwy::N_AVX2::Simd<float, 4ul, 0>, unsigned long, unsigned long) const Line | Count | Source | 49 | 9.65M | HWY_INLINE Vec<D> LoadPart(D /* tag */, const size_t row, size_t i) const { | 50 | 9.65M | JXL_DASSERT(Lanes(D()) <= stride_); | 51 | | // Since these functions are used also for DC, no alignment at all is | 52 | | // guaranteed in the case of floating blocks. | 53 | | // TODO(veluca): consider using a different class for DC-to-LF and | 54 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 55 | 9.65M | return LoadU(D(), Address(row, i)); | 56 | 9.65M | } |
enc_chroma_from_luma.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 8ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 8ul, 0> >(hwy::N_AVX2::Simd<float, 8ul, 0>, unsigned long, unsigned long) const Line | Count | Source | 49 | 594M | HWY_INLINE Vec<D> LoadPart(D /* tag */, const size_t row, size_t i) const { | 50 | 594M | JXL_DASSERT(Lanes(D()) <= stride_); | 51 | | // Since these functions are used also for DC, no alignment at all is | 52 | | // guaranteed in the case of floating blocks. | 53 | | // TODO(veluca): consider using a different class for DC-to-LF and | 54 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 55 | 594M | return LoadU(D(), Address(row, i)); | 56 | 594M | } |
Unexecuted instantiation: enc_chroma_from_luma.cc:decltype (Zero((hwy::N_SSE2::Simd<float, 4ul, 0>)())) jxl::N_SSE2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE2::Simd<float, 4ul, 0> >(hwy::N_SSE2::Simd<float, 4ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:decltype (Zero((hwy::N_SSE4::Simd<float, 1ul, 0>)())) jxl::N_SSE4::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE4::Simd<float, 1ul, 0> >(hwy::N_SSE4::Simd<float, 1ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:decltype (Zero((hwy::N_SSE4::Simd<float, 2ul, 0>)())) jxl::N_SSE4::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE4::Simd<float, 2ul, 0> >(hwy::N_SSE4::Simd<float, 2ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 1ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 1ul, 0> >(hwy::N_AVX2::Simd<float, 1ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 2ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 2ul, 0> >(hwy::N_AVX2::Simd<float, 2ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:decltype (Zero((hwy::N_SSE2::Simd<float, 1ul, 0>)())) jxl::N_SSE2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE2::Simd<float, 1ul, 0> >(hwy::N_SSE2::Simd<float, 1ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:decltype (Zero((hwy::N_SSE2::Simd<float, 2ul, 0>)())) jxl::N_SSE2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE2::Simd<float, 2ul, 0> >(hwy::N_SSE2::Simd<float, 2ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:decltype (Zero((hwy::N_SSE4::Simd<float, 4ul, 0>)())) jxl::N_SSE4::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE4::Simd<float, 4ul, 0> >(hwy::N_SSE4::Simd<float, 4ul, 0>, unsigned long, unsigned long) const enc_ac_strategy.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 4ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 4ul, 0> >(hwy::N_AVX2::Simd<float, 4ul, 0>, unsigned long, unsigned long) const Line | Count | Source | 49 | 1.78G | HWY_INLINE Vec<D> LoadPart(D /* tag */, const size_t row, size_t i) const { | 50 | 1.78G | JXL_DASSERT(Lanes(D()) <= stride_); | 51 | | // Since these functions are used also for DC, no alignment at all is | 52 | | // guaranteed in the case of floating blocks. | 53 | | // TODO(veluca): consider using a different class for DC-to-LF and | 54 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 55 | 1.78G | return LoadU(D(), Address(row, i)); | 56 | 1.78G | } |
enc_ac_strategy.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 8ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 8ul, 0> >(hwy::N_AVX2::Simd<float, 8ul, 0>, unsigned long, unsigned long) const Line | Count | Source | 49 | 7.11G | HWY_INLINE Vec<D> LoadPart(D /* tag */, const size_t row, size_t i) const { | 50 | 7.11G | JXL_DASSERT(Lanes(D()) <= stride_); | 51 | | // Since these functions are used also for DC, no alignment at all is | 52 | | // guaranteed in the case of floating blocks. | 53 | | // TODO(veluca): consider using a different class for DC-to-LF and | 54 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 55 | 7.11G | return LoadU(D(), Address(row, i)); | 56 | 7.11G | } |
Unexecuted instantiation: enc_ac_strategy.cc:decltype (Zero((hwy::N_SSE2::Simd<float, 4ul, 0>)())) jxl::N_SSE2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE2::Simd<float, 4ul, 0> >(hwy::N_SSE2::Simd<float, 4ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:decltype (Zero((hwy::N_SSE4::Simd<float, 1ul, 0>)())) jxl::N_SSE4::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE4::Simd<float, 1ul, 0> >(hwy::N_SSE4::Simd<float, 1ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:decltype (Zero((hwy::N_SSE4::Simd<float, 2ul, 0>)())) jxl::N_SSE4::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE4::Simd<float, 2ul, 0> >(hwy::N_SSE4::Simd<float, 2ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 1ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 1ul, 0> >(hwy::N_AVX2::Simd<float, 1ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 2ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 2ul, 0> >(hwy::N_AVX2::Simd<float, 2ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:decltype (Zero((hwy::N_SSE2::Simd<float, 1ul, 0>)())) jxl::N_SSE2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE2::Simd<float, 1ul, 0> >(hwy::N_SSE2::Simd<float, 1ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:decltype (Zero((hwy::N_SSE2::Simd<float, 2ul, 0>)())) jxl::N_SSE2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE2::Simd<float, 2ul, 0> >(hwy::N_SSE2::Simd<float, 2ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: dec_group.cc:decltype (Zero((hwy::N_SSE4::Simd<float, 4ul, 0>)())) jxl::N_SSE4::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE4::Simd<float, 4ul, 0> >(hwy::N_SSE4::Simd<float, 4ul, 0>, unsigned long, unsigned long) const dec_group.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 8ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 8ul, 0> >(hwy::N_AVX2::Simd<float, 8ul, 0>, unsigned long, unsigned long) const Line | Count | Source | 49 | 283M | HWY_INLINE Vec<D> LoadPart(D /* tag */, const size_t row, size_t i) const { | 50 | 283M | JXL_DASSERT(Lanes(D()) <= stride_); | 51 | | // Since these functions are used also for DC, no alignment at all is | 52 | | // guaranteed in the case of floating blocks. | 53 | | // TODO(veluca): consider using a different class for DC-to-LF and | 54 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 55 | 283M | return LoadU(D(), Address(row, i)); | 56 | 283M | } |
Unexecuted instantiation: dec_group.cc:decltype (Zero((hwy::N_SSE2::Simd<float, 4ul, 0>)())) jxl::N_SSE2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE2::Simd<float, 4ul, 0> >(hwy::N_SSE2::Simd<float, 4ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: dec_group.cc:decltype (Zero((hwy::N_SSE4::Simd<float, 1ul, 0>)())) jxl::N_SSE4::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE4::Simd<float, 1ul, 0> >(hwy::N_SSE4::Simd<float, 1ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: dec_group.cc:decltype (Zero((hwy::N_SSE4::Simd<float, 2ul, 0>)())) jxl::N_SSE4::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE4::Simd<float, 2ul, 0> >(hwy::N_SSE4::Simd<float, 2ul, 0>, unsigned long, unsigned long) const dec_group.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 1ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 1ul, 0> >(hwy::N_AVX2::Simd<float, 1ul, 0>, unsigned long, unsigned long) const Line | Count | Source | 49 | 3.23M | HWY_INLINE Vec<D> LoadPart(D /* tag */, const size_t row, size_t i) const { | 50 | 3.23M | JXL_DASSERT(Lanes(D()) <= stride_); | 51 | | // Since these functions are used also for DC, no alignment at all is | 52 | | // guaranteed in the case of floating blocks. | 53 | | // TODO(veluca): consider using a different class for DC-to-LF and | 54 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 55 | 3.23M | return LoadU(D(), Address(row, i)); | 56 | 3.23M | } |
dec_group.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 2ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 2ul, 0> >(hwy::N_AVX2::Simd<float, 2ul, 0>, unsigned long, unsigned long) const Line | Count | Source | 49 | 5.18M | HWY_INLINE Vec<D> LoadPart(D /* tag */, const size_t row, size_t i) const { | 50 | 5.18M | JXL_DASSERT(Lanes(D()) <= stride_); | 51 | | // Since these functions are used also for DC, no alignment at all is | 52 | | // guaranteed in the case of floating blocks. | 53 | | // TODO(veluca): consider using a different class for DC-to-LF and | 54 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 55 | 5.18M | return LoadU(D(), Address(row, i)); | 56 | 5.18M | } |
dec_group.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 4ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 4ul, 0> >(hwy::N_AVX2::Simd<float, 4ul, 0>, unsigned long, unsigned long) const Line | Count | Source | 49 | 3.62M | HWY_INLINE Vec<D> LoadPart(D /* tag */, const size_t row, size_t i) const { | 50 | 3.62M | JXL_DASSERT(Lanes(D()) <= stride_); | 51 | | // Since these functions are used also for DC, no alignment at all is | 52 | | // guaranteed in the case of floating blocks. | 53 | | // TODO(veluca): consider using a different class for DC-to-LF and | 54 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 55 | 3.62M | return LoadU(D(), Address(row, i)); | 56 | 3.62M | } |
Unexecuted instantiation: dec_group.cc:decltype (Zero((hwy::N_SSE2::Simd<float, 1ul, 0>)())) jxl::N_SSE2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE2::Simd<float, 1ul, 0> >(hwy::N_SSE2::Simd<float, 1ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: dec_group.cc:decltype (Zero((hwy::N_SSE2::Simd<float, 2ul, 0>)())) jxl::N_SSE2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE2::Simd<float, 2ul, 0> >(hwy::N_SSE2::Simd<float, 2ul, 0>, unsigned long, unsigned long) const |
57 | | |
58 | 16.3G | HWY_INLINE float Read(const size_t row, const size_t i) const { |
59 | 16.3G | return *Address(row, i); |
60 | 16.3G | } Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const enc_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const Line | Count | Source | 58 | 55.3M | HWY_INLINE float Read(const size_t row, const size_t i) const { | 59 | 55.3M | return *Address(row, i); | 60 | 55.3M | } |
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const Line | Count | Source | 58 | 55.3M | HWY_INLINE float Read(const size_t row, const size_t i) const { | 59 | 55.3M | return *Address(row, i); | 60 | 55.3M | } |
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const Line | Count | Source | 58 | 16.0G | HWY_INLINE float Read(const size_t row, const size_t i) const { | 59 | 16.0G | return *Address(row, i); | 60 | 16.0G | } |
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const dec_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const Line | Count | Source | 58 | 167M | HWY_INLINE float Read(const size_t row, const size_t i) const { | 59 | 167M | return *Address(row, i); | 60 | 167M | } |
Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const |
61 | | |
62 | | constexpr HWY_INLINE const float* Address(const size_t row, |
63 | 27.0G | const size_t i) const { |
64 | 27.0G | return data_ + row * stride_ + i; |
65 | 27.0G | } Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const enc_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const Line | Count | Source | 63 | 327M | const size_t i) const { | 64 | 327M | return data_ + row * stride_ + i; | 65 | 327M | } |
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const Line | Count | Source | 63 | 661M | const size_t i) const { | 64 | 661M | return data_ + row * stride_ + i; | 65 | 661M | } |
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const Line | Count | Source | 63 | 25.5G | const size_t i) const { | 64 | 25.5G | return data_ + row * stride_ + i; | 65 | 25.5G | } |
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const dec_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const Line | Count | Source | 63 | 496M | const size_t i) const { | 64 | 496M | return data_ + row * stride_ + i; | 65 | 496M | } |
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const |
66 | | |
67 | 653M | size_t Stride() const { return stride_; }Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Stride() const Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Stride() const Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Stride() const Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Stride() const enc_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Stride() const Line | Count | Source | 67 | 2.01M | size_t Stride() const { return stride_; } |
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Stride() const Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Stride() const enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Stride() const Line | Count | Source | 67 | 2.01M | size_t Stride() const { return stride_; } |
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Stride() const Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Stride() const enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Stride() const Line | Count | Source | 67 | 615M | size_t Stride() const { return stride_; } |
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Stride() const Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Stride() const dec_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Stride() const Line | Count | Source | 67 | 33.4M | size_t Stride() const { return stride_; } |
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Stride() const |
68 | | |
69 | | private: |
70 | | size_t stride_; |
71 | | const float* JXL_RESTRICT data_; |
72 | | }; |
73 | | |
74 | | class DCTTo { |
75 | | public: |
76 | 1.95G | DCTTo(float* data, size_t stride) : stride_(stride), data_(data) {}Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) enc_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) Line | Count | Source | 76 | 16.0M | DCTTo(float* data, size_t stride) : stride_(stride), data_(data) {} |
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) Line | Count | Source | 76 | 57.8M | DCTTo(float* data, size_t stride) : stride_(stride), data_(data) {} |
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) Line | Count | Source | 76 | 1.82G | DCTTo(float* data, size_t stride) : stride_(stride), data_(data) {} |
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) dec_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) Line | Count | Source | 76 | 49.5M | DCTTo(float* data, size_t stride) : stride_(stride), data_(data) {} |
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) |
77 | | |
78 | | template <typename D> |
79 | | HWY_INLINE void StorePart(D /* tag */, const Vec<D>& v, const size_t row, |
80 | 10.0G | size_t i) const { |
81 | 10.0G | JXL_DASSERT(Lanes(D()) <= stride_); |
82 | | // Since these functions are used also for DC, no alignment at all is |
83 | | // guaranteed in the case of floating blocks. |
84 | | // TODO(veluca): consider using a different class for DC-to-LF and |
85 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. |
86 | 10.0G | StoreU(v, D(), Address(row, i)); |
87 | 10.0G | } Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE4::Simd<float, 4ul, 0> >(hwy::N_SSE4::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_SSE4::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 4ul, 0> >(hwy::N_AVX2::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 8ul, 0> >(hwy::N_AVX2::Simd<float, 8ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 8ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE2::Simd<float, 4ul, 0> >(hwy::N_SSE2::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_SSE2::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE4::Simd<float, 4ul, 0> >(hwy::N_SSE4::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_SSE4::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 4ul, 0> >(hwy::N_AVX2::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const Line | Count | Source | 80 | 9.65M | size_t i) const { | 81 | 9.65M | JXL_DASSERT(Lanes(D()) <= stride_); | 82 | | // Since these functions are used also for DC, no alignment at all is | 83 | | // guaranteed in the case of floating blocks. | 84 | | // TODO(veluca): consider using a different class for DC-to-LF and | 85 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 86 | 9.65M | StoreU(v, D(), Address(row, i)); | 87 | 9.65M | } |
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 8ul, 0> >(hwy::N_AVX2::Simd<float, 8ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 8ul, 0>)())) const&, unsigned long, unsigned long) const Line | Count | Source | 80 | 260M | size_t i) const { | 81 | 260M | JXL_DASSERT(Lanes(D()) <= stride_); | 82 | | // Since these functions are used also for DC, no alignment at all is | 83 | | // guaranteed in the case of floating blocks. | 84 | | // TODO(veluca): consider using a different class for DC-to-LF and | 85 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 86 | 260M | StoreU(v, D(), Address(row, i)); | 87 | 260M | } |
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE2::Simd<float, 4ul, 0> >(hwy::N_SSE2::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_SSE2::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE4::Simd<float, 1ul, 0> >(hwy::N_SSE4::Simd<float, 1ul, 0>, decltype (Zero((hwy::N_SSE4::Simd<float, 1ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE4::Simd<float, 2ul, 0> >(hwy::N_SSE4::Simd<float, 2ul, 0>, decltype (Zero((hwy::N_SSE4::Simd<float, 2ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 1ul, 0> >(hwy::N_AVX2::Simd<float, 1ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 1ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 2ul, 0> >(hwy::N_AVX2::Simd<float, 2ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 2ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE2::Simd<float, 1ul, 0> >(hwy::N_SSE2::Simd<float, 1ul, 0>, decltype (Zero((hwy::N_SSE2::Simd<float, 1ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE2::Simd<float, 2ul, 0> >(hwy::N_SSE2::Simd<float, 2ul, 0>, decltype (Zero((hwy::N_SSE2::Simd<float, 2ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE4::Simd<float, 4ul, 0> >(hwy::N_SSE4::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_SSE4::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 4ul, 0> >(hwy::N_AVX2::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const Line | Count | Source | 80 | 9.65M | size_t i) const { | 81 | 9.65M | JXL_DASSERT(Lanes(D()) <= stride_); | 82 | | // Since these functions are used also for DC, no alignment at all is | 83 | | // guaranteed in the case of floating blocks. | 84 | | // TODO(veluca): consider using a different class for DC-to-LF and | 85 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 86 | 9.65M | StoreU(v, D(), Address(row, i)); | 87 | 9.65M | } |
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 8ul, 0> >(hwy::N_AVX2::Simd<float, 8ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 8ul, 0>)())) const&, unsigned long, unsigned long) const Line | Count | Source | 80 | 594M | size_t i) const { | 81 | 594M | JXL_DASSERT(Lanes(D()) <= stride_); | 82 | | // Since these functions are used also for DC, no alignment at all is | 83 | | // guaranteed in the case of floating blocks. | 84 | | // TODO(veluca): consider using a different class for DC-to-LF and | 85 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 86 | 594M | StoreU(v, D(), Address(row, i)); | 87 | 594M | } |
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE2::Simd<float, 4ul, 0> >(hwy::N_SSE2::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_SSE2::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE4::Simd<float, 1ul, 0> >(hwy::N_SSE4::Simd<float, 1ul, 0>, decltype (Zero((hwy::N_SSE4::Simd<float, 1ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE4::Simd<float, 2ul, 0> >(hwy::N_SSE4::Simd<float, 2ul, 0>, decltype (Zero((hwy::N_SSE4::Simd<float, 2ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 1ul, 0> >(hwy::N_AVX2::Simd<float, 1ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 1ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 2ul, 0> >(hwy::N_AVX2::Simd<float, 2ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 2ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE2::Simd<float, 1ul, 0> >(hwy::N_SSE2::Simd<float, 1ul, 0>, decltype (Zero((hwy::N_SSE2::Simd<float, 1ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE2::Simd<float, 2ul, 0> >(hwy::N_SSE2::Simd<float, 2ul, 0>, decltype (Zero((hwy::N_SSE2::Simd<float, 2ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE4::Simd<float, 4ul, 0> >(hwy::N_SSE4::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_SSE4::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 4ul, 0> >(hwy::N_AVX2::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const Line | Count | Source | 80 | 1.78G | size_t i) const { | 81 | 1.78G | JXL_DASSERT(Lanes(D()) <= stride_); | 82 | | // Since these functions are used also for DC, no alignment at all is | 83 | | // guaranteed in the case of floating blocks. | 84 | | // TODO(veluca): consider using a different class for DC-to-LF and | 85 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 86 | 1.78G | StoreU(v, D(), Address(row, i)); | 87 | 1.78G | } |
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 8ul, 0> >(hwy::N_AVX2::Simd<float, 8ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 8ul, 0>)())) const&, unsigned long, unsigned long) const Line | Count | Source | 80 | 7.11G | size_t i) const { | 81 | 7.11G | JXL_DASSERT(Lanes(D()) <= stride_); | 82 | | // Since these functions are used also for DC, no alignment at all is | 83 | | // guaranteed in the case of floating blocks. | 84 | | // TODO(veluca): consider using a different class for DC-to-LF and | 85 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 86 | 7.11G | StoreU(v, D(), Address(row, i)); | 87 | 7.11G | } |
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE2::Simd<float, 4ul, 0> >(hwy::N_SSE2::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_SSE2::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE4::Simd<float, 1ul, 0> >(hwy::N_SSE4::Simd<float, 1ul, 0>, decltype (Zero((hwy::N_SSE4::Simd<float, 1ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE4::Simd<float, 2ul, 0> >(hwy::N_SSE4::Simd<float, 2ul, 0>, decltype (Zero((hwy::N_SSE4::Simd<float, 2ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 1ul, 0> >(hwy::N_AVX2::Simd<float, 1ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 1ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 2ul, 0> >(hwy::N_AVX2::Simd<float, 2ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 2ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE2::Simd<float, 1ul, 0> >(hwy::N_SSE2::Simd<float, 1ul, 0>, decltype (Zero((hwy::N_SSE2::Simd<float, 1ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE2::Simd<float, 2ul, 0> >(hwy::N_SSE2::Simd<float, 2ul, 0>, decltype (Zero((hwy::N_SSE2::Simd<float, 2ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE4::Simd<float, 4ul, 0> >(hwy::N_SSE4::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_SSE4::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 8ul, 0> >(hwy::N_AVX2::Simd<float, 8ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 8ul, 0>)())) const&, unsigned long, unsigned long) const Line | Count | Source | 80 | 283M | size_t i) const { | 81 | 283M | JXL_DASSERT(Lanes(D()) <= stride_); | 82 | | // Since these functions are used also for DC, no alignment at all is | 83 | | // guaranteed in the case of floating blocks. | 84 | | // TODO(veluca): consider using a different class for DC-to-LF and | 85 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 86 | 283M | StoreU(v, D(), Address(row, i)); | 87 | 283M | } |
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE2::Simd<float, 4ul, 0> >(hwy::N_SSE2::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_SSE2::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE4::Simd<float, 1ul, 0> >(hwy::N_SSE4::Simd<float, 1ul, 0>, decltype (Zero((hwy::N_SSE4::Simd<float, 1ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE4::Simd<float, 2ul, 0> >(hwy::N_SSE4::Simd<float, 2ul, 0>, decltype (Zero((hwy::N_SSE4::Simd<float, 2ul, 0>)())) const&, unsigned long, unsigned long) const dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 1ul, 0> >(hwy::N_AVX2::Simd<float, 1ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 1ul, 0>)())) const&, unsigned long, unsigned long) const Line | Count | Source | 80 | 3.23M | size_t i) const { | 81 | 3.23M | JXL_DASSERT(Lanes(D()) <= stride_); | 82 | | // Since these functions are used also for DC, no alignment at all is | 83 | | // guaranteed in the case of floating blocks. | 84 | | // TODO(veluca): consider using a different class for DC-to-LF and | 85 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 86 | 3.23M | StoreU(v, D(), Address(row, i)); | 87 | 3.23M | } |
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 2ul, 0> >(hwy::N_AVX2::Simd<float, 2ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 2ul, 0>)())) const&, unsigned long, unsigned long) const Line | Count | Source | 80 | 5.18M | size_t i) const { | 81 | 5.18M | JXL_DASSERT(Lanes(D()) <= stride_); | 82 | | // Since these functions are used also for DC, no alignment at all is | 83 | | // guaranteed in the case of floating blocks. | 84 | | // TODO(veluca): consider using a different class for DC-to-LF and | 85 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 86 | 5.18M | StoreU(v, D(), Address(row, i)); | 87 | 5.18M | } |
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 4ul, 0> >(hwy::N_AVX2::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const Line | Count | Source | 80 | 3.62M | size_t i) const { | 81 | 3.62M | JXL_DASSERT(Lanes(D()) <= stride_); | 82 | | // Since these functions are used also for DC, no alignment at all is | 83 | | // guaranteed in the case of floating blocks. | 84 | | // TODO(veluca): consider using a different class for DC-to-LF and | 85 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 86 | 3.62M | StoreU(v, D(), Address(row, i)); | 87 | 3.62M | } |
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE2::Simd<float, 1ul, 0> >(hwy::N_SSE2::Simd<float, 1ul, 0>, decltype (Zero((hwy::N_SSE2::Simd<float, 1ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE2::Simd<float, 2ul, 0> >(hwy::N_SSE2::Simd<float, 2ul, 0>, decltype (Zero((hwy::N_SSE2::Simd<float, 2ul, 0>)())) const&, unsigned long, unsigned long) const |
88 | | |
89 | 16.3G | HWY_INLINE void Write(float v, const size_t row, const size_t i) const { |
90 | 16.3G | *Address(row, i) = v; |
91 | 16.3G | } Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const enc_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const Line | Count | Source | 89 | 55.3M | HWY_INLINE void Write(float v, const size_t row, const size_t i) const { | 90 | 55.3M | *Address(row, i) = v; | 91 | 55.3M | } |
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const Line | Count | Source | 89 | 55.3M | HWY_INLINE void Write(float v, const size_t row, const size_t i) const { | 90 | 55.3M | *Address(row, i) = v; | 91 | 55.3M | } |
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const Line | Count | Source | 89 | 16.0G | HWY_INLINE void Write(float v, const size_t row, const size_t i) const { | 90 | 16.0G | *Address(row, i) = v; | 91 | 16.0G | } |
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const dec_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const Line | Count | Source | 89 | 167M | HWY_INLINE void Write(float v, const size_t row, const size_t i) const { | 90 | 167M | *Address(row, i) = v; | 91 | 167M | } |
Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const |
92 | | |
93 | 27.0G | constexpr HWY_INLINE float* Address(const size_t row, const size_t i) const { |
94 | 27.0G | return data_ + row * stride_ + i; |
95 | 27.0G | } Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const enc_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const Line | Count | Source | 93 | 327M | constexpr HWY_INLINE float* Address(const size_t row, const size_t i) const { | 94 | 327M | return data_ + row * stride_ + i; | 95 | 327M | } |
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const Line | Count | Source | 93 | 661M | constexpr HWY_INLINE float* Address(const size_t row, const size_t i) const { | 94 | 661M | return data_ + row * stride_ + i; | 95 | 661M | } |
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const Line | Count | Source | 93 | 25.5G | constexpr HWY_INLINE float* Address(const size_t row, const size_t i) const { | 94 | 25.5G | return data_ + row * stride_ + i; | 95 | 25.5G | } |
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const dec_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const Line | Count | Source | 93 | 496M | constexpr HWY_INLINE float* Address(const size_t row, const size_t i) const { | 94 | 496M | return data_ + row * stride_ + i; | 95 | 496M | } |
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const |
96 | | |
97 | 653M | size_t Stride() const { return stride_; }Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Stride() const Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Stride() const Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Stride() const Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Stride() const enc_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Stride() const Line | Count | Source | 97 | 2.01M | size_t Stride() const { return stride_; } |
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Stride() const Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Stride() const enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Stride() const Line | Count | Source | 97 | 2.01M | size_t Stride() const { return stride_; } |
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Stride() const Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Stride() const enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Stride() const Line | Count | Source | 97 | 615M | size_t Stride() const { return stride_; } |
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Stride() const Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Stride() const dec_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Stride() const Line | Count | Source | 97 | 33.4M | size_t Stride() const { return stride_; } |
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Stride() const |
98 | | |
99 | | private: |
100 | | size_t stride_; |
101 | | float* JXL_RESTRICT data_; |
102 | | }; |
103 | | |
104 | | } // namespace |
105 | | // NOLINTNEXTLINE(google-readability-namespace-comments) |
106 | | } // namespace HWY_NAMESPACE |
107 | | } // namespace jxl |
108 | | HWY_AFTER_NAMESPACE(); |
109 | | |
110 | | #endif // LIB_JXL_DCT_BLOCK_INL_H_ |