/src/libjxl/lib/jxl/dct_block-inl.h
Line | Count | Source |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | |
6 | | // Adapters for DCT input/output: from/to contiguous blocks or image rows. |
7 | | |
8 | | #include "lib/jxl/base/compiler_specific.h" |
9 | | |
10 | | #if defined(LIB_JXL_DCT_BLOCK_INL_H_) == defined(HWY_TARGET_TOGGLE) |
11 | | #ifdef LIB_JXL_DCT_BLOCK_INL_H_ |
12 | | #undef LIB_JXL_DCT_BLOCK_INL_H_ |
13 | | #else |
14 | | #define LIB_JXL_DCT_BLOCK_INL_H_ |
15 | | #endif |
16 | | |
17 | | #include <cstddef> |
18 | | #include <hwy/highway.h> |
19 | | |
20 | | #include "lib/jxl/base/status.h" |
21 | | |
22 | | HWY_BEFORE_NAMESPACE(); |
23 | | namespace jxl { |
24 | | namespace HWY_NAMESPACE { |
25 | | namespace { |
26 | | |
27 | | // These templates are not found via ADL. |
28 | | using hwy::HWY_NAMESPACE::Vec; |
29 | | |
30 | | // Block: (x, y) <-> (N * y + x) |
31 | | // Lines: (x, y) <-> (stride * y + x) |
32 | | // |
33 | | // I.e. Block is a specialization of Lines with fixed stride. |
34 | | // |
35 | | // FromXXX should implement Read and Load (Read vector). |
36 | | // ToXXX should implement Write and Store (Write vector). |
37 | | |
38 | | template <size_t N> |
39 | | using BlockDesc = HWY_CAPPED(float, N); |
40 | | |
41 | | // Here and in the following, the SZ template parameter specifies the number of |
42 | | // values to load/store. Needed because we want to handle 4x4 sub-blocks of |
43 | | // 16x16 blocks. |
44 | | class DCTFrom { |
45 | | public: |
46 | 151M | DCTFrom(const float* data, size_t stride) : stride_(stride), data_(data) {} Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) enc_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) Line | Count | Source | 46 | 1.76M | DCTFrom(const float* data, size_t stride) : stride_(stride), data_(data) {} |
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) Line | Count | Source | 46 | 4.94M | DCTFrom(const float* data, size_t stride) : stride_(stride), data_(data) {} |
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) Line | Count | Source | 46 | 138M | DCTFrom(const float* data, size_t stride) : stride_(stride), data_(data) {} |
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) dec_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) Line | Count | Source | 46 | 6.01M | DCTFrom(const float* data, size_t stride) : stride_(stride), data_(data) {} |
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::DCTFrom(float const*, unsigned long) |
47 | | |
48 | | template <typename D> |
49 | 787M | HWY_INLINE Vec<D> LoadPart(D /* tag */, const size_t row, size_t i) const { |
50 | 787M | JXL_DASSERT(Lanes(D()) <= stride_); |
51 | | // Since these functions are used also for DC, no alignment at all is |
52 | | // guaranteed in the case of floating blocks. |
53 | | // TODO(veluca): consider using a different class for DC-to-LF and |
54 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. |
55 | 787M | return LoadU(D(), Address(row, i)); |
56 | 787M | } Unexecuted instantiation: enc_adaptive_quantization.cc:decltype (Zero((hwy::N_SSE4::Simd<float, 4ul, 0>)())) jxl::N_SSE4::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE4::Simd<float, 4ul, 0> >(hwy::N_SSE4::Simd<float, 4ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_adaptive_quantization.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 4ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 4ul, 0> >(hwy::N_AVX2::Simd<float, 4ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_adaptive_quantization.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 8ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 8ul, 0> >(hwy::N_AVX2::Simd<float, 8ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_adaptive_quantization.cc:decltype (Zero((hwy::N_SSE2::Simd<float, 4ul, 0>)())) jxl::N_SSE2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE2::Simd<float, 4ul, 0> >(hwy::N_SSE2::Simd<float, 4ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:decltype (Zero((hwy::N_SSE4::Simd<float, 4ul, 0>)())) jxl::N_SSE4::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE4::Simd<float, 4ul, 0> >(hwy::N_SSE4::Simd<float, 4ul, 0>, unsigned long, unsigned long) const enc_group.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 4ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 4ul, 0> >(hwy::N_AVX2::Simd<float, 4ul, 0>, unsigned long, unsigned long) const Line | Count | Source | 49 | 1.56M | HWY_INLINE Vec<D> LoadPart(D /* tag */, const size_t row, size_t i) const { | 50 | 1.56M | JXL_DASSERT(Lanes(D()) <= stride_); | 51 | | // Since these functions are used also for DC, no alignment at all is | 52 | | // guaranteed in the case of floating blocks. | 53 | | // TODO(veluca): consider using a different class for DC-to-LF and | 54 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 55 | 1.56M | return LoadU(D(), Address(row, i)); | 56 | 1.56M | } |
enc_group.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 8ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 8ul, 0> >(hwy::N_AVX2::Simd<float, 8ul, 0>, unsigned long, unsigned long) const Line | Count | Source | 49 | 19.9M | HWY_INLINE Vec<D> LoadPart(D /* tag */, const size_t row, size_t i) const { | 50 | 19.9M | JXL_DASSERT(Lanes(D()) <= stride_); | 51 | | // Since these functions are used also for DC, no alignment at all is | 52 | | // guaranteed in the case of floating blocks. | 53 | | // TODO(veluca): consider using a different class for DC-to-LF and | 54 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 55 | 19.9M | return LoadU(D(), Address(row, i)); | 56 | 19.9M | } |
Unexecuted instantiation: enc_group.cc:decltype (Zero((hwy::N_SSE2::Simd<float, 4ul, 0>)())) jxl::N_SSE2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE2::Simd<float, 4ul, 0> >(hwy::N_SSE2::Simd<float, 4ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:decltype (Zero((hwy::N_SSE4::Simd<float, 1ul, 0>)())) jxl::N_SSE4::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE4::Simd<float, 1ul, 0> >(hwy::N_SSE4::Simd<float, 1ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:decltype (Zero((hwy::N_SSE4::Simd<float, 2ul, 0>)())) jxl::N_SSE4::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE4::Simd<float, 2ul, 0> >(hwy::N_SSE4::Simd<float, 2ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 1ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 1ul, 0> >(hwy::N_AVX2::Simd<float, 1ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 2ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 2ul, 0> >(hwy::N_AVX2::Simd<float, 2ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:decltype (Zero((hwy::N_SSE2::Simd<float, 1ul, 0>)())) jxl::N_SSE2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE2::Simd<float, 1ul, 0> >(hwy::N_SSE2::Simd<float, 1ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:decltype (Zero((hwy::N_SSE2::Simd<float, 2ul, 0>)())) jxl::N_SSE2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE2::Simd<float, 2ul, 0> >(hwy::N_SSE2::Simd<float, 2ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:decltype (Zero((hwy::N_SSE4::Simd<float, 4ul, 0>)())) jxl::N_SSE4::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE4::Simd<float, 4ul, 0> >(hwy::N_SSE4::Simd<float, 4ul, 0>, unsigned long, unsigned long) const enc_chroma_from_luma.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 4ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 4ul, 0> >(hwy::N_AVX2::Simd<float, 4ul, 0>, unsigned long, unsigned long) const Line | Count | Source | 49 | 1.56M | HWY_INLINE Vec<D> LoadPart(D /* tag */, const size_t row, size_t i) const { | 50 | 1.56M | JXL_DASSERT(Lanes(D()) <= stride_); | 51 | | // Since these functions are used also for DC, no alignment at all is | 52 | | // guaranteed in the case of floating blocks. | 53 | | // TODO(veluca): consider using a different class for DC-to-LF and | 54 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 55 | 1.56M | return LoadU(D(), Address(row, i)); | 56 | 1.56M | } |
enc_chroma_from_luma.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 8ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 8ul, 0> >(hwy::N_AVX2::Simd<float, 8ul, 0>, unsigned long, unsigned long) const Line | Count | Source | 49 | 45.4M | HWY_INLINE Vec<D> LoadPart(D /* tag */, const size_t row, size_t i) const { | 50 | 45.4M | JXL_DASSERT(Lanes(D()) <= stride_); | 51 | | // Since these functions are used also for DC, no alignment at all is | 52 | | // guaranteed in the case of floating blocks. | 53 | | // TODO(veluca): consider using a different class for DC-to-LF and | 54 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 55 | 45.4M | return LoadU(D(), Address(row, i)); | 56 | 45.4M | } |
Unexecuted instantiation: enc_chroma_from_luma.cc:decltype (Zero((hwy::N_SSE2::Simd<float, 4ul, 0>)())) jxl::N_SSE2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE2::Simd<float, 4ul, 0> >(hwy::N_SSE2::Simd<float, 4ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:decltype (Zero((hwy::N_SSE4::Simd<float, 1ul, 0>)())) jxl::N_SSE4::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE4::Simd<float, 1ul, 0> >(hwy::N_SSE4::Simd<float, 1ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:decltype (Zero((hwy::N_SSE4::Simd<float, 2ul, 0>)())) jxl::N_SSE4::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE4::Simd<float, 2ul, 0> >(hwy::N_SSE4::Simd<float, 2ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 1ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 1ul, 0> >(hwy::N_AVX2::Simd<float, 1ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 2ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 2ul, 0> >(hwy::N_AVX2::Simd<float, 2ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:decltype (Zero((hwy::N_SSE2::Simd<float, 1ul, 0>)())) jxl::N_SSE2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE2::Simd<float, 1ul, 0> >(hwy::N_SSE2::Simd<float, 1ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:decltype (Zero((hwy::N_SSE2::Simd<float, 2ul, 0>)())) jxl::N_SSE2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE2::Simd<float, 2ul, 0> >(hwy::N_SSE2::Simd<float, 2ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:decltype (Zero((hwy::N_SSE4::Simd<float, 4ul, 0>)())) jxl::N_SSE4::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE4::Simd<float, 4ul, 0> >(hwy::N_SSE4::Simd<float, 4ul, 0>, unsigned long, unsigned long) const enc_ac_strategy.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 4ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 4ul, 0> >(hwy::N_AVX2::Simd<float, 4ul, 0>, unsigned long, unsigned long) const Line | Count | Source | 49 | 135M | HWY_INLINE Vec<D> LoadPart(D /* tag */, const size_t row, size_t i) const { | 50 | 135M | JXL_DASSERT(Lanes(D()) <= stride_); | 51 | | // Since these functions are used also for DC, no alignment at all is | 52 | | // guaranteed in the case of floating blocks. | 53 | | // TODO(veluca): consider using a different class for DC-to-LF and | 54 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 55 | 135M | return LoadU(D(), Address(row, i)); | 56 | 135M | } |
enc_ac_strategy.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 8ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 8ul, 0> >(hwy::N_AVX2::Simd<float, 8ul, 0>, unsigned long, unsigned long) const Line | Count | Source | 49 | 558M | HWY_INLINE Vec<D> LoadPart(D /* tag */, const size_t row, size_t i) const { | 50 | 558M | JXL_DASSERT(Lanes(D()) <= stride_); | 51 | | // Since these functions are used also for DC, no alignment at all is | 52 | | // guaranteed in the case of floating blocks. | 53 | | // TODO(veluca): consider using a different class for DC-to-LF and | 54 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 55 | 558M | return LoadU(D(), Address(row, i)); | 56 | 558M | } |
Unexecuted instantiation: enc_ac_strategy.cc:decltype (Zero((hwy::N_SSE2::Simd<float, 4ul, 0>)())) jxl::N_SSE2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE2::Simd<float, 4ul, 0> >(hwy::N_SSE2::Simd<float, 4ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:decltype (Zero((hwy::N_SSE4::Simd<float, 1ul, 0>)())) jxl::N_SSE4::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE4::Simd<float, 1ul, 0> >(hwy::N_SSE4::Simd<float, 1ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:decltype (Zero((hwy::N_SSE4::Simd<float, 2ul, 0>)())) jxl::N_SSE4::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE4::Simd<float, 2ul, 0> >(hwy::N_SSE4::Simd<float, 2ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 1ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 1ul, 0> >(hwy::N_AVX2::Simd<float, 1ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 2ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 2ul, 0> >(hwy::N_AVX2::Simd<float, 2ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:decltype (Zero((hwy::N_SSE2::Simd<float, 1ul, 0>)())) jxl::N_SSE2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE2::Simd<float, 1ul, 0> >(hwy::N_SSE2::Simd<float, 1ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:decltype (Zero((hwy::N_SSE2::Simd<float, 2ul, 0>)())) jxl::N_SSE2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE2::Simd<float, 2ul, 0> >(hwy::N_SSE2::Simd<float, 2ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: dec_group.cc:decltype (Zero((hwy::N_SSE4::Simd<float, 4ul, 0>)())) jxl::N_SSE4::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE4::Simd<float, 4ul, 0> >(hwy::N_SSE4::Simd<float, 4ul, 0>, unsigned long, unsigned long) const dec_group.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 8ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 8ul, 0> >(hwy::N_AVX2::Simd<float, 8ul, 0>, unsigned long, unsigned long) const Line | Count | Source | 49 | 23.0M | HWY_INLINE Vec<D> LoadPart(D /* tag */, const size_t row, size_t i) const { | 50 | 23.0M | JXL_DASSERT(Lanes(D()) <= stride_); | 51 | | // Since these functions are used also for DC, no alignment at all is | 52 | | // guaranteed in the case of floating blocks. | 53 | | // TODO(veluca): consider using a different class for DC-to-LF and | 54 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 55 | 23.0M | return LoadU(D(), Address(row, i)); | 56 | 23.0M | } |
Unexecuted instantiation: dec_group.cc:decltype (Zero((hwy::N_SSE2::Simd<float, 4ul, 0>)())) jxl::N_SSE2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE2::Simd<float, 4ul, 0> >(hwy::N_SSE2::Simd<float, 4ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: dec_group.cc:decltype (Zero((hwy::N_SSE4::Simd<float, 1ul, 0>)())) jxl::N_SSE4::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE4::Simd<float, 1ul, 0> >(hwy::N_SSE4::Simd<float, 1ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: dec_group.cc:decltype (Zero((hwy::N_SSE4::Simd<float, 2ul, 0>)())) jxl::N_SSE4::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE4::Simd<float, 2ul, 0> >(hwy::N_SSE4::Simd<float, 2ul, 0>, unsigned long, unsigned long) const dec_group.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 1ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 1ul, 0> >(hwy::N_AVX2::Simd<float, 1ul, 0>, unsigned long, unsigned long) const Line | Count | Source | 49 | 267k | HWY_INLINE Vec<D> LoadPart(D /* tag */, const size_t row, size_t i) const { | 50 | 267k | JXL_DASSERT(Lanes(D()) <= stride_); | 51 | | // Since these functions are used also for DC, no alignment at all is | 52 | | // guaranteed in the case of floating blocks. | 53 | | // TODO(veluca): consider using a different class for DC-to-LF and | 54 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 55 | 267k | return LoadU(D(), Address(row, i)); | 56 | 267k | } |
dec_group.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 2ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 2ul, 0> >(hwy::N_AVX2::Simd<float, 2ul, 0>, unsigned long, unsigned long) const Line | Count | Source | 49 | 505k | HWY_INLINE Vec<D> LoadPart(D /* tag */, const size_t row, size_t i) const { | 50 | 505k | JXL_DASSERT(Lanes(D()) <= stride_); | 51 | | // Since these functions are used also for DC, no alignment at all is | 52 | | // guaranteed in the case of floating blocks. | 53 | | // TODO(veluca): consider using a different class for DC-to-LF and | 54 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 55 | 505k | return LoadU(D(), Address(row, i)); | 56 | 505k | } |
dec_group.cc:decltype (Zero((hwy::N_AVX2::Simd<float, 4ul, 0>)())) jxl::N_AVX2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_AVX2::Simd<float, 4ul, 0> >(hwy::N_AVX2::Simd<float, 4ul, 0>, unsigned long, unsigned long) const Line | Count | Source | 49 | 466k | HWY_INLINE Vec<D> LoadPart(D /* tag */, const size_t row, size_t i) const { | 50 | 466k | JXL_DASSERT(Lanes(D()) <= stride_); | 51 | | // Since these functions are used also for DC, no alignment at all is | 52 | | // guaranteed in the case of floating blocks. | 53 | | // TODO(veluca): consider using a different class for DC-to-LF and | 54 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 55 | 466k | return LoadU(D(), Address(row, i)); | 56 | 466k | } |
Unexecuted instantiation: dec_group.cc:decltype (Zero((hwy::N_SSE2::Simd<float, 1ul, 0>)())) jxl::N_SSE2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE2::Simd<float, 1ul, 0> >(hwy::N_SSE2::Simd<float, 1ul, 0>, unsigned long, unsigned long) const Unexecuted instantiation: dec_group.cc:decltype (Zero((hwy::N_SSE2::Simd<float, 2ul, 0>)())) jxl::N_SSE2::(anonymous namespace)::DCTFrom::LoadPart<hwy::N_SSE2::Simd<float, 2ul, 0> >(hwy::N_SSE2::Simd<float, 2ul, 0>, unsigned long, unsigned long) const |
57 | | |
58 | 1.26G | HWY_INLINE float Read(const size_t row, const size_t i) const { |
59 | 1.26G | return *Address(row, i); |
60 | 1.26G | } Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const enc_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const Line | Count | Source | 58 | 8.78M | HWY_INLINE float Read(const size_t row, const size_t i) const { | 59 | 8.78M | return *Address(row, i); | 60 | 8.78M | } |
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const Line | Count | Source | 58 | 8.78M | HWY_INLINE float Read(const size_t row, const size_t i) const { | 59 | 8.78M | return *Address(row, i); | 60 | 8.78M | } |
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const Line | Count | Source | 58 | 1.22G | HWY_INLINE float Read(const size_t row, const size_t i) const { | 59 | 1.22G | return *Address(row, i); | 60 | 1.22G | } |
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const dec_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const Line | Count | Source | 58 | 26.8M | HWY_INLINE float Read(const size_t row, const size_t i) const { | 59 | 26.8M | return *Address(row, i); | 60 | 26.8M | } |
Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Read(unsigned long, unsigned long) const |
61 | | |
62 | | constexpr HWY_INLINE const float* Address(const size_t row, |
63 | 2.10G | const size_t i) const { |
64 | 2.10G | return data_ + row * stride_ + i; |
65 | 2.10G | } Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const enc_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const Line | Count | Source | 63 | 30.5M | const size_t i) const { | 64 | 30.5M | return data_ + row * stride_ + i; | 65 | 30.5M | } |
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const Line | Count | Source | 63 | 55.9M | const size_t i) const { | 64 | 55.9M | return data_ + row * stride_ + i; | 65 | 55.9M | } |
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const Line | Count | Source | 63 | 1.96G | const size_t i) const { | 64 | 1.96G | return data_ + row * stride_ + i; | 65 | 1.96G | } |
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const dec_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const Line | Count | Source | 63 | 55.0M | const size_t i) const { | 64 | 55.0M | return data_ + row * stride_ + i; | 65 | 55.0M | } |
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Address(unsigned long, unsigned long) const |
66 | | |
67 | 51.0M | size_t Stride() const { return stride_; } Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Stride() const Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Stride() const Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Stride() const Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Stride() const enc_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Stride() const Line | Count | Source | 67 | 180k | size_t Stride() const { return stride_; } |
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Stride() const Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Stride() const enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Stride() const Line | Count | Source | 67 | 180k | size_t Stride() const { return stride_; } |
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Stride() const Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Stride() const enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Stride() const Line | Count | Source | 67 | 46.7M | size_t Stride() const { return stride_; } |
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Stride() const Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTFrom::Stride() const dec_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTFrom::Stride() const Line | Count | Source | 67 | 3.90M | size_t Stride() const { return stride_; } |
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTFrom::Stride() const |
68 | | |
69 | | private: |
70 | | size_t stride_; |
71 | | const float* JXL_RESTRICT data_; |
72 | | }; |
73 | | |
74 | | class DCTTo { |
75 | | public: |
76 | 151M | DCTTo(float* data, size_t stride) : stride_(stride), data_(data) {} Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) enc_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) Line | Count | Source | 76 | 1.76M | DCTTo(float* data, size_t stride) : stride_(stride), data_(data) {} |
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) Line | Count | Source | 76 | 4.94M | DCTTo(float* data, size_t stride) : stride_(stride), data_(data) {} |
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) Line | Count | Source | 76 | 138M | DCTTo(float* data, size_t stride) : stride_(stride), data_(data) {} |
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) dec_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) Line | Count | Source | 76 | 6.01M | DCTTo(float* data, size_t stride) : stride_(stride), data_(data) {} |
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::DCTTo(float*, unsigned long) |
77 | | |
78 | | template <typename D> |
79 | | HWY_INLINE void StorePart(D /* tag */, const Vec<D>& v, const size_t row, |
80 | 787M | size_t i) const { |
81 | 787M | JXL_DASSERT(Lanes(D()) <= stride_); |
82 | | // Since these functions are used also for DC, no alignment at all is |
83 | | // guaranteed in the case of floating blocks. |
84 | | // TODO(veluca): consider using a different class for DC-to-LF and |
85 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. |
86 | 787M | StoreU(v, D(), Address(row, i)); |
87 | 787M | } Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE4::Simd<float, 4ul, 0> >(hwy::N_SSE4::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_SSE4::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 4ul, 0> >(hwy::N_AVX2::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 8ul, 0> >(hwy::N_AVX2::Simd<float, 8ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 8ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE2::Simd<float, 4ul, 0> >(hwy::N_SSE2::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_SSE2::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE4::Simd<float, 4ul, 0> >(hwy::N_SSE4::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_SSE4::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 4ul, 0> >(hwy::N_AVX2::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const Line | Count | Source | 80 | 1.56M | size_t i) const { | 81 | 1.56M | JXL_DASSERT(Lanes(D()) <= stride_); | 82 | | // Since these functions are used also for DC, no alignment at all is | 83 | | // guaranteed in the case of floating blocks. | 84 | | // TODO(veluca): consider using a different class for DC-to-LF and | 85 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 86 | 1.56M | StoreU(v, D(), Address(row, i)); | 87 | 1.56M | } |
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 8ul, 0> >(hwy::N_AVX2::Simd<float, 8ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 8ul, 0>)())) const&, unsigned long, unsigned long) const Line | Count | Source | 80 | 19.9M | size_t i) const { | 81 | 19.9M | JXL_DASSERT(Lanes(D()) <= stride_); | 82 | | // Since these functions are used also for DC, no alignment at all is | 83 | | // guaranteed in the case of floating blocks. | 84 | | // TODO(veluca): consider using a different class for DC-to-LF and | 85 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 86 | 19.9M | StoreU(v, D(), Address(row, i)); | 87 | 19.9M | } |
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE2::Simd<float, 4ul, 0> >(hwy::N_SSE2::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_SSE2::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE4::Simd<float, 1ul, 0> >(hwy::N_SSE4::Simd<float, 1ul, 0>, decltype (Zero((hwy::N_SSE4::Simd<float, 1ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE4::Simd<float, 2ul, 0> >(hwy::N_SSE4::Simd<float, 2ul, 0>, decltype (Zero((hwy::N_SSE4::Simd<float, 2ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 1ul, 0> >(hwy::N_AVX2::Simd<float, 1ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 1ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 2ul, 0> >(hwy::N_AVX2::Simd<float, 2ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 2ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE2::Simd<float, 1ul, 0> >(hwy::N_SSE2::Simd<float, 1ul, 0>, decltype (Zero((hwy::N_SSE2::Simd<float, 1ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE2::Simd<float, 2ul, 0> >(hwy::N_SSE2::Simd<float, 2ul, 0>, decltype (Zero((hwy::N_SSE2::Simd<float, 2ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE4::Simd<float, 4ul, 0> >(hwy::N_SSE4::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_SSE4::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 4ul, 0> >(hwy::N_AVX2::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const Line | Count | Source | 80 | 1.56M | size_t i) const { | 81 | 1.56M | JXL_DASSERT(Lanes(D()) <= stride_); | 82 | | // Since these functions are used also for DC, no alignment at all is | 83 | | // guaranteed in the case of floating blocks. | 84 | | // TODO(veluca): consider using a different class for DC-to-LF and | 85 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 86 | 1.56M | StoreU(v, D(), Address(row, i)); | 87 | 1.56M | } |
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 8ul, 0> >(hwy::N_AVX2::Simd<float, 8ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 8ul, 0>)())) const&, unsigned long, unsigned long) const Line | Count | Source | 80 | 45.4M | size_t i) const { | 81 | 45.4M | JXL_DASSERT(Lanes(D()) <= stride_); | 82 | | // Since these functions are used also for DC, no alignment at all is | 83 | | // guaranteed in the case of floating blocks. | 84 | | // TODO(veluca): consider using a different class for DC-to-LF and | 85 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 86 | 45.4M | StoreU(v, D(), Address(row, i)); | 87 | 45.4M | } |
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE2::Simd<float, 4ul, 0> >(hwy::N_SSE2::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_SSE2::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE4::Simd<float, 1ul, 0> >(hwy::N_SSE4::Simd<float, 1ul, 0>, decltype (Zero((hwy::N_SSE4::Simd<float, 1ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE4::Simd<float, 2ul, 0> >(hwy::N_SSE4::Simd<float, 2ul, 0>, decltype (Zero((hwy::N_SSE4::Simd<float, 2ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 1ul, 0> >(hwy::N_AVX2::Simd<float, 1ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 1ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 2ul, 0> >(hwy::N_AVX2::Simd<float, 2ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 2ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE2::Simd<float, 1ul, 0> >(hwy::N_SSE2::Simd<float, 1ul, 0>, decltype (Zero((hwy::N_SSE2::Simd<float, 1ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE2::Simd<float, 2ul, 0> >(hwy::N_SSE2::Simd<float, 2ul, 0>, decltype (Zero((hwy::N_SSE2::Simd<float, 2ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE4::Simd<float, 4ul, 0> >(hwy::N_SSE4::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_SSE4::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 4ul, 0> >(hwy::N_AVX2::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const Line | Count | Source | 80 | 135M | size_t i) const { | 81 | 135M | JXL_DASSERT(Lanes(D()) <= stride_); | 82 | | // Since these functions are used also for DC, no alignment at all is | 83 | | // guaranteed in the case of floating blocks. | 84 | | // TODO(veluca): consider using a different class for DC-to-LF and | 85 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 86 | 135M | StoreU(v, D(), Address(row, i)); | 87 | 135M | } |
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 8ul, 0> >(hwy::N_AVX2::Simd<float, 8ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 8ul, 0>)())) const&, unsigned long, unsigned long) const Line | Count | Source | 80 | 558M | size_t i) const { | 81 | 558M | JXL_DASSERT(Lanes(D()) <= stride_); | 82 | | // Since these functions are used also for DC, no alignment at all is | 83 | | // guaranteed in the case of floating blocks. | 84 | | // TODO(veluca): consider using a different class for DC-to-LF and | 85 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 86 | 558M | StoreU(v, D(), Address(row, i)); | 87 | 558M | } |
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE2::Simd<float, 4ul, 0> >(hwy::N_SSE2::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_SSE2::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE4::Simd<float, 1ul, 0> >(hwy::N_SSE4::Simd<float, 1ul, 0>, decltype (Zero((hwy::N_SSE4::Simd<float, 1ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE4::Simd<float, 2ul, 0> >(hwy::N_SSE4::Simd<float, 2ul, 0>, decltype (Zero((hwy::N_SSE4::Simd<float, 2ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 1ul, 0> >(hwy::N_AVX2::Simd<float, 1ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 1ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 2ul, 0> >(hwy::N_AVX2::Simd<float, 2ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 2ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE2::Simd<float, 1ul, 0> >(hwy::N_SSE2::Simd<float, 1ul, 0>, decltype (Zero((hwy::N_SSE2::Simd<float, 1ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE2::Simd<float, 2ul, 0> >(hwy::N_SSE2::Simd<float, 2ul, 0>, decltype (Zero((hwy::N_SSE2::Simd<float, 2ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE4::Simd<float, 4ul, 0> >(hwy::N_SSE4::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_SSE4::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 8ul, 0> >(hwy::N_AVX2::Simd<float, 8ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 8ul, 0>)())) const&, unsigned long, unsigned long) const Line | Count | Source | 80 | 23.0M | size_t i) const { | 81 | 23.0M | JXL_DASSERT(Lanes(D()) <= stride_); | 82 | | // Since these functions are used also for DC, no alignment at all is | 83 | | // guaranteed in the case of floating blocks. | 84 | | // TODO(veluca): consider using a different class for DC-to-LF and | 85 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 86 | 23.0M | StoreU(v, D(), Address(row, i)); | 87 | 23.0M | } |
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE2::Simd<float, 4ul, 0> >(hwy::N_SSE2::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_SSE2::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE4::Simd<float, 1ul, 0> >(hwy::N_SSE4::Simd<float, 1ul, 0>, decltype (Zero((hwy::N_SSE4::Simd<float, 1ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE4::Simd<float, 2ul, 0> >(hwy::N_SSE4::Simd<float, 2ul, 0>, decltype (Zero((hwy::N_SSE4::Simd<float, 2ul, 0>)())) const&, unsigned long, unsigned long) const dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 1ul, 0> >(hwy::N_AVX2::Simd<float, 1ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 1ul, 0>)())) const&, unsigned long, unsigned long) const Line | Count | Source | 80 | 267k | size_t i) const { | 81 | 267k | JXL_DASSERT(Lanes(D()) <= stride_); | 82 | | // Since these functions are used also for DC, no alignment at all is | 83 | | // guaranteed in the case of floating blocks. | 84 | | // TODO(veluca): consider using a different class for DC-to-LF and | 85 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 86 | 267k | StoreU(v, D(), Address(row, i)); | 87 | 267k | } |
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 2ul, 0> >(hwy::N_AVX2::Simd<float, 2ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 2ul, 0>)())) const&, unsigned long, unsigned long) const Line | Count | Source | 80 | 505k | size_t i) const { | 81 | 505k | JXL_DASSERT(Lanes(D()) <= stride_); | 82 | | // Since these functions are used also for DC, no alignment at all is | 83 | | // guaranteed in the case of floating blocks. | 84 | | // TODO(veluca): consider using a different class for DC-to-LF and | 85 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 86 | 505k | StoreU(v, D(), Address(row, i)); | 87 | 505k | } |
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCTTo::StorePart<hwy::N_AVX2::Simd<float, 4ul, 0> >(hwy::N_AVX2::Simd<float, 4ul, 0>, decltype (Zero((hwy::N_AVX2::Simd<float, 4ul, 0>)())) const&, unsigned long, unsigned long) const Line | Count | Source | 80 | 466k | size_t i) const { | 81 | 466k | JXL_DASSERT(Lanes(D()) <= stride_); | 82 | | // Since these functions are used also for DC, no alignment at all is | 83 | | // guaranteed in the case of floating blocks. | 84 | | // TODO(veluca): consider using a different class for DC-to-LF and | 85 | | // DC-from-LF, or copying DC values to/from a temporary aligned location. | 86 | 466k | StoreU(v, D(), Address(row, i)); | 87 | 466k | } |
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE2::Simd<float, 1ul, 0> >(hwy::N_SSE2::Simd<float, 1ul, 0>, decltype (Zero((hwy::N_SSE2::Simd<float, 1ul, 0>)())) const&, unsigned long, unsigned long) const Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCTTo::StorePart<hwy::N_SSE2::Simd<float, 2ul, 0> >(hwy::N_SSE2::Simd<float, 2ul, 0>, decltype (Zero((hwy::N_SSE2::Simd<float, 2ul, 0>)())) const&, unsigned long, unsigned long) const |
88 | | |
89 | 1.26G | HWY_INLINE void Write(float v, const size_t row, const size_t i) const { |
90 | 1.26G | *Address(row, i) = v; |
91 | 1.26G | } Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const enc_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const Line | Count | Source | 89 | 8.78M | HWY_INLINE void Write(float v, const size_t row, const size_t i) const { | 90 | 8.78M | *Address(row, i) = v; | 91 | 8.78M | } |
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const Line | Count | Source | 89 | 8.78M | HWY_INLINE void Write(float v, const size_t row, const size_t i) const { | 90 | 8.78M | *Address(row, i) = v; | 91 | 8.78M | } |
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const Line | Count | Source | 89 | 1.22G | HWY_INLINE void Write(float v, const size_t row, const size_t i) const { | 90 | 1.22G | *Address(row, i) = v; | 91 | 1.22G | } |
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const dec_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const Line | Count | Source | 89 | 26.8M | HWY_INLINE void Write(float v, const size_t row, const size_t i) const { | 90 | 26.8M | *Address(row, i) = v; | 91 | 26.8M | } |
Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Write(float, unsigned long, unsigned long) const |
92 | | |
93 | 2.10G | constexpr HWY_INLINE float* Address(const size_t row, const size_t i) const { |
94 | 2.10G | return data_ + row * stride_ + i; |
95 | 2.10G | } Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const enc_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const Line | Count | Source | 93 | 30.5M | constexpr HWY_INLINE float* Address(const size_t row, const size_t i) const { | 94 | 30.5M | return data_ + row * stride_ + i; | 95 | 30.5M | } |
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const Line | Count | Source | 93 | 55.9M | constexpr HWY_INLINE float* Address(const size_t row, const size_t i) const { | 94 | 55.9M | return data_ + row * stride_ + i; | 95 | 55.9M | } |
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const Line | Count | Source | 93 | 1.96G | constexpr HWY_INLINE float* Address(const size_t row, const size_t i) const { | 94 | 1.96G | return data_ + row * stride_ + i; | 95 | 1.96G | } |
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const dec_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const Line | Count | Source | 93 | 55.0M | constexpr HWY_INLINE float* Address(const size_t row, const size_t i) const { | 94 | 55.0M | return data_ + row * stride_ + i; | 95 | 55.0M | } |
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Address(unsigned long, unsigned long) const |
96 | | |
97 | 51.0M | size_t Stride() const { return stride_; } Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Stride() const Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Stride() const Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Stride() const Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Stride() const enc_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Stride() const Line | Count | Source | 97 | 180k | size_t Stride() const { return stride_; } |
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Stride() const Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Stride() const enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Stride() const Line | Count | Source | 97 | 180k | size_t Stride() const { return stride_; } |
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Stride() const Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Stride() const enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Stride() const Line | Count | Source | 97 | 46.7M | size_t Stride() const { return stride_; } |
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Stride() const Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::DCTTo::Stride() const dec_group.cc:jxl::N_AVX2::(anonymous namespace)::DCTTo::Stride() const Line | Count | Source | 97 | 3.90M | size_t Stride() const { return stride_; } |
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::DCTTo::Stride() const |
98 | | |
99 | | private: |
100 | | size_t stride_; |
101 | | float* JXL_RESTRICT data_; |
102 | | }; |
103 | | |
104 | | } // namespace |
105 | | // NOLINTNEXTLINE(google-readability-namespace-comments) |
106 | | } // namespace HWY_NAMESPACE |
107 | | } // namespace jxl |
108 | | HWY_AFTER_NAMESPACE(); |
109 | | |
110 | | #endif // LIB_JXL_DCT_BLOCK_INL_H_ |