/src/libjxl/lib/jxl/dec_transforms-inl.h
Line | Count | Source |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | |
6 | | #include <cstring> |
7 | | |
8 | | #include "lib/jxl/base/compiler_specific.h" |
9 | | #include "lib/jxl/frame_dimensions.h" |
10 | | |
11 | | #if defined(LIB_JXL_DEC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE) |
12 | | #ifdef LIB_JXL_DEC_TRANSFORMS_INL_H_ |
13 | | #undef LIB_JXL_DEC_TRANSFORMS_INL_H_ |
14 | | #else |
15 | | #define LIB_JXL_DEC_TRANSFORMS_INL_H_ |
16 | | #endif |
17 | | |
18 | | #include <cstddef> |
19 | | #include <hwy/highway.h> |
20 | | |
21 | | #include "lib/jxl/ac_strategy.h" |
22 | | #include "lib/jxl/dct-inl.h" |
23 | | #include "lib/jxl/dct_scales.h" |
24 | | HWY_BEFORE_NAMESPACE(); |
25 | | namespace jxl { |
26 | | namespace HWY_NAMESPACE { |
27 | | namespace { |
28 | | |
29 | | // These templates are not found via ADL. |
30 | | using hwy::HWY_NAMESPACE::MulAdd; |
31 | | |
32 | | // Computes the lowest-frequency LF_ROWSxLF_COLS-sized square in output, which |
33 | | // is a DCT_ROWS*DCT_COLS-sized DCT block, by doing a ROWS*COLS DCT on the |
34 | | // input block. |
35 | | template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS, |
36 | | size_t ROWS, size_t COLS> |
37 | | JXL_INLINE void ReinterpretingDCT(const float* input, const size_t input_stride, |
38 | | float* output, const size_t output_stride, |
39 | | float* JXL_RESTRICT block, |
40 | 54.3k | float* JXL_RESTRICT scratch_space) { |
41 | 54.3k | static_assert(LF_ROWS == ROWS, |
42 | 54.3k | "ReinterpretingDCT should only be called with LF == N"); |
43 | 54.3k | static_assert(LF_COLS == COLS, |
44 | 54.3k | "ReinterpretingDCT should only be called with LF == N"); |
45 | 54.3k | ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block, |
46 | 54.3k | scratch_space); |
47 | 54.3k | if (ROWS < COLS) { |
48 | 106k | for (size_t y = 0; y < LF_ROWS; y++) { |
49 | 159k | for (size_t x = 0; x < LF_COLS; x++) { |
50 | 106k | output[y * output_stride + x] = |
51 | 106k | block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) * |
52 | 106k | DCTTotalResampleScale<COLS, DCT_COLS>(x); |
53 | 106k | } |
54 | 53.0k | } |
55 | 52.9k | } else { |
56 | 4.15k | for (size_t y = 0; y < LF_COLS; y++) { |
57 | 9.91k | for (size_t x = 0; x < LF_ROWS; x++) { |
58 | 7.14k | output[y * output_stride + x] = |
59 | 7.14k | block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) * |
60 | 7.14k | DCTTotalResampleScale<ROWS, DCT_ROWS>(x); |
61 | 7.14k | } |
62 | 2.77k | } |
63 | 1.37k | } |
64 | 54.3k | } Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*) dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Line | Count | Source | 40 | 372 | float* JXL_RESTRICT scratch_space) { | 41 | 372 | static_assert(LF_ROWS == ROWS, | 42 | 372 | "ReinterpretingDCT should only be called with LF == N"); | 43 | 372 | static_assert(LF_COLS == COLS, | 44 | 372 | "ReinterpretingDCT should only be called with LF == N"); | 45 | 372 | ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block, | 46 | 372 | scratch_space); | 47 | 372 | if (ROWS < COLS) { | 48 | 0 | for (size_t y = 0; y < LF_ROWS; y++) { | 49 | 0 | for (size_t x = 0; x < LF_COLS; x++) { | 50 | 0 | output[y * output_stride + x] = | 51 | 0 | block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) * | 52 | 0 | DCTTotalResampleScale<COLS, DCT_COLS>(x); | 53 | 0 | } | 54 | 0 | } | 55 | 372 | } else { | 56 | 744 | for (size_t y = 0; y < LF_COLS; y++) { | 57 | 1.11k | for (size_t x = 0; x < LF_ROWS; x++) { | 58 | 744 | output[y * output_stride + x] = | 59 | 744 | block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) * | 60 | 744 | DCTTotalResampleScale<ROWS, DCT_ROWS>(x); | 61 | 744 | } | 62 | 372 | } | 63 | 372 | } | 64 | 372 | } |
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Line | Count | Source | 40 | 52.7k | float* JXL_RESTRICT scratch_space) { | 41 | 52.7k | static_assert(LF_ROWS == ROWS, | 42 | 52.7k | "ReinterpretingDCT should only be called with LF == N"); | 43 | 52.7k | static_assert(LF_COLS == COLS, | 44 | 52.7k | "ReinterpretingDCT should only be called with LF == N"); | 45 | 52.7k | ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block, | 46 | 52.7k | scratch_space); | 47 | 52.8k | if (ROWS < COLS) { | 48 | 105k | for (size_t y = 0; y < LF_ROWS; y++) { | 49 | 158k | for (size_t x = 0; x < LF_COLS; x++) { | 50 | 105k | output[y * output_stride + x] = | 51 | 105k | block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) * | 52 | 105k | DCTTotalResampleScale<COLS, DCT_COLS>(x); | 53 | 105k | } | 54 | 52.8k | } | 55 | 18.4E | } else { | 56 | 18.4E | for (size_t y = 0; y < LF_COLS; y++) { | 57 | 0 | for (size_t x = 0; x < LF_ROWS; x++) { | 58 | 0 | output[y * output_stride + x] = | 59 | 0 | block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) * | 60 | 0 | DCTTotalResampleScale<ROWS, DCT_ROWS>(x); | 61 | 0 | } | 62 | 0 | } | 63 | 18.4E | } | 64 | 52.7k | } |
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Line | Count | Source | 40 | 807 | float* JXL_RESTRICT scratch_space) { | 41 | 807 | static_assert(LF_ROWS == ROWS, | 42 | 807 | "ReinterpretingDCT should only be called with LF == N"); | 43 | 807 | static_assert(LF_COLS == COLS, | 44 | 807 | "ReinterpretingDCT should only be called with LF == N"); | 45 | 807 | ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block, | 46 | 807 | scratch_space); | 47 | 807 | if (ROWS < COLS) { | 48 | 0 | for (size_t y = 0; y < LF_ROWS; y++) { | 49 | 0 | for (size_t x = 0; x < LF_COLS; x++) { | 50 | 0 | output[y * output_stride + x] = | 51 | 0 | block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) * | 52 | 0 | DCTTotalResampleScale<COLS, DCT_COLS>(x); | 53 | 0 | } | 54 | 0 | } | 55 | 807 | } else { | 56 | 2.42k | for (size_t y = 0; y < LF_COLS; y++) { | 57 | 4.84k | for (size_t x = 0; x < LF_ROWS; x++) { | 58 | 3.22k | output[y * output_stride + x] = | 59 | 3.22k | block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) * | 60 | 3.22k | DCTTotalResampleScale<ROWS, DCT_ROWS>(x); | 61 | 3.22k | } | 62 | 1.61k | } | 63 | 807 | } | 64 | 807 | } |
Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*) dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Line | Count | Source | 40 | 204 | float* JXL_RESTRICT scratch_space) { | 41 | 204 | static_assert(LF_ROWS == ROWS, | 42 | 204 | "ReinterpretingDCT should only be called with LF == N"); | 43 | 204 | static_assert(LF_COLS == COLS, | 44 | 204 | "ReinterpretingDCT should only be called with LF == N"); | 45 | 204 | ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block, | 46 | 204 | scratch_space); | 47 | 204 | if (ROWS < COLS) { | 48 | 0 | for (size_t y = 0; y < LF_ROWS; y++) { | 49 | 0 | for (size_t x = 0; x < LF_COLS; x++) { | 50 | 0 | output[y * output_stride + x] = | 51 | 0 | block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) * | 52 | 0 | DCTTotalResampleScale<COLS, DCT_COLS>(x); | 53 | 0 | } | 54 | 0 | } | 55 | 204 | } else { | 56 | 612 | for (size_t y = 0; y < LF_COLS; y++) { | 57 | 2.04k | for (size_t x = 0; x < LF_ROWS; x++) { | 58 | 1.63k | output[y * output_stride + x] = | 59 | 1.63k | block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) * | 60 | 1.63k | DCTTotalResampleScale<ROWS, DCT_ROWS>(x); | 61 | 1.63k | } | 62 | 408 | } | 63 | 204 | } | 64 | 204 | } |
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Line | Count | Source | 40 | 102 | float* JXL_RESTRICT scratch_space) { | 41 | 102 | static_assert(LF_ROWS == ROWS, | 42 | 102 | "ReinterpretingDCT should only be called with LF == N"); | 43 | 102 | static_assert(LF_COLS == COLS, | 44 | 102 | "ReinterpretingDCT should only be called with LF == N"); | 45 | 102 | ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block, | 46 | 102 | scratch_space); | 47 | 102 | if (ROWS < COLS) { | 48 | 306 | for (size_t y = 0; y < LF_ROWS; y++) { | 49 | 1.02k | for (size_t x = 0; x < LF_COLS; x++) { | 50 | 816 | output[y * output_stride + x] = | 51 | 816 | block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) * | 52 | 816 | DCTTotalResampleScale<COLS, DCT_COLS>(x); | 53 | 816 | } | 54 | 204 | } | 55 | 102 | } else { | 56 | 0 | for (size_t y = 0; y < LF_COLS; y++) { | 57 | 0 | for (size_t x = 0; x < LF_ROWS; x++) { | 58 | 0 | output[y * output_stride + x] = | 59 | 0 | block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) * | 60 | 0 | DCTTotalResampleScale<ROWS, DCT_ROWS>(x); | 61 | 0 | } | 62 | 0 | } | 63 | 0 | } | 64 | 102 | } |
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Line | Count | Source | 40 | 96 | float* JXL_RESTRICT scratch_space) { | 41 | 96 | static_assert(LF_ROWS == ROWS, | 42 | 96 | "ReinterpretingDCT should only be called with LF == N"); | 43 | 96 | static_assert(LF_COLS == COLS, | 44 | 96 | "ReinterpretingDCT should only be called with LF == N"); | 45 | 96 | ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block, | 46 | 96 | scratch_space); | 47 | 96 | if (ROWS < COLS) { | 48 | 0 | for (size_t y = 0; y < LF_ROWS; y++) { | 49 | 0 | for (size_t x = 0; x < LF_COLS; x++) { | 50 | 0 | output[y * output_stride + x] = | 51 | 0 | block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) * | 52 | 0 | DCTTotalResampleScale<COLS, DCT_COLS>(x); | 53 | 0 | } | 54 | 0 | } | 55 | 96 | } else { | 56 | 480 | for (size_t y = 0; y < LF_COLS; y++) { | 57 | 1.92k | for (size_t x = 0; x < LF_ROWS; x++) { | 58 | 1.53k | output[y * output_stride + x] = | 59 | 1.53k | block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) * | 60 | 1.53k | DCTTotalResampleScale<ROWS, DCT_ROWS>(x); | 61 | 1.53k | } | 62 | 384 | } | 63 | 96 | } | 64 | 96 | } |
Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*) |
65 | | |
66 | | template <size_t S> |
67 | 441 | void IDCT2TopBlock(const float* block, size_t stride_out, float* out) { |
68 | 441 | static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim"); |
69 | 441 | static_assert(S % 2 == 0, "S should be even"); |
70 | 441 | float temp[kDCTBlockSize]; |
71 | 441 | constexpr size_t num_2x2 = S / 2; |
72 | 1.47k | for (size_t y = 0; y < num_2x2; y++) { |
73 | 4.11k | for (size_t x = 0; x < num_2x2; x++) { |
74 | 3.08k | float c00 = block[y * kBlockDim + x]; |
75 | 3.08k | float c01 = block[y * kBlockDim + num_2x2 + x]; |
76 | 3.08k | float c10 = block[(y + num_2x2) * kBlockDim + x]; |
77 | 3.08k | float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x]; |
78 | 3.08k | float r00 = c00 + c01 + c10 + c11; |
79 | 3.08k | float r01 = c00 + c01 - c10 - c11; |
80 | 3.08k | float r10 = c00 - c01 + c10 - c11; |
81 | 3.08k | float r11 = c00 - c01 - c10 + c11; |
82 | 3.08k | temp[y * 2 * kBlockDim + x * 2] = r00; |
83 | 3.08k | temp[y * 2 * kBlockDim + x * 2 + 1] = r01; |
84 | 3.08k | temp[(y * 2 + 1) * kBlockDim + x * 2] = r10; |
85 | 3.08k | temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11; |
86 | 3.08k | } |
87 | 1.02k | } |
88 | 2.49k | for (size_t y = 0; y < S; y++) { |
89 | 14.4k | for (size_t x = 0; x < S; x++) { |
90 | 12.3k | out[y * stride_out + x] = temp[y * kBlockDim + x]; |
91 | 12.3k | } |
92 | 2.05k | } |
93 | 441 | } Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*) dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*) Line | Count | Source | 67 | 147 | void IDCT2TopBlock(const float* block, size_t stride_out, float* out) { | 68 | 147 | static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim"); | 69 | 147 | static_assert(S % 2 == 0, "S should be even"); | 70 | 147 | float temp[kDCTBlockSize]; | 71 | 147 | constexpr size_t num_2x2 = S / 2; | 72 | 294 | for (size_t y = 0; y < num_2x2; y++) { | 73 | 294 | for (size_t x = 0; x < num_2x2; x++) { | 74 | 147 | float c00 = block[y * kBlockDim + x]; | 75 | 147 | float c01 = block[y * kBlockDim + num_2x2 + x]; | 76 | 147 | float c10 = block[(y + num_2x2) * kBlockDim + x]; | 77 | 147 | float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x]; | 78 | 147 | float r00 = c00 + c01 + c10 + c11; | 79 | 147 | float r01 = c00 + c01 - c10 - c11; | 80 | 147 | float r10 = c00 - c01 + c10 - c11; | 81 | 147 | float r11 = c00 - c01 - c10 + c11; | 82 | 147 | temp[y * 2 * kBlockDim + x * 2] = r00; | 83 | 147 | temp[y * 2 * kBlockDim + x * 2 + 1] = r01; | 84 | 147 | temp[(y * 2 + 1) * kBlockDim + x * 2] = r10; | 85 | 147 | temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11; | 86 | 147 | } | 87 | 147 | } | 88 | 441 | for (size_t y = 0; y < S; y++) { | 89 | 882 | for (size_t x = 0; x < S; x++) { | 90 | 588 | out[y * stride_out + x] = temp[y * kBlockDim + x]; | 91 | 588 | } | 92 | 294 | } | 93 | 147 | } |
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*) Line | Count | Source | 67 | 147 | void IDCT2TopBlock(const float* block, size_t stride_out, float* out) { | 68 | 147 | static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim"); | 69 | 147 | static_assert(S % 2 == 0, "S should be even"); | 70 | 147 | float temp[kDCTBlockSize]; | 71 | 147 | constexpr size_t num_2x2 = S / 2; | 72 | 441 | for (size_t y = 0; y < num_2x2; y++) { | 73 | 882 | for (size_t x = 0; x < num_2x2; x++) { | 74 | 588 | float c00 = block[y * kBlockDim + x]; | 75 | 588 | float c01 = block[y * kBlockDim + num_2x2 + x]; | 76 | 588 | float c10 = block[(y + num_2x2) * kBlockDim + x]; | 77 | 588 | float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x]; | 78 | 588 | float r00 = c00 + c01 + c10 + c11; | 79 | 588 | float r01 = c00 + c01 - c10 - c11; | 80 | 588 | float r10 = c00 - c01 + c10 - c11; | 81 | 588 | float r11 = c00 - c01 - c10 + c11; | 82 | 588 | temp[y * 2 * kBlockDim + x * 2] = r00; | 83 | 588 | temp[y * 2 * kBlockDim + x * 2 + 1] = r01; | 84 | 588 | temp[(y * 2 + 1) * kBlockDim + x * 2] = r10; | 85 | 588 | temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11; | 86 | 588 | } | 87 | 294 | } | 88 | 735 | for (size_t y = 0; y < S; y++) { | 89 | 2.94k | for (size_t x = 0; x < S; x++) { | 90 | 2.35k | out[y * stride_out + x] = temp[y * kBlockDim + x]; | 91 | 2.35k | } | 92 | 588 | } | 93 | 147 | } |
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*) Line | Count | Source | 67 | 147 | void IDCT2TopBlock(const float* block, size_t stride_out, float* out) { | 68 | 147 | static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim"); | 69 | 147 | static_assert(S % 2 == 0, "S should be even"); | 70 | 147 | float temp[kDCTBlockSize]; | 71 | 147 | constexpr size_t num_2x2 = S / 2; | 72 | 735 | for (size_t y = 0; y < num_2x2; y++) { | 73 | 2.94k | for (size_t x = 0; x < num_2x2; x++) { | 74 | 2.35k | float c00 = block[y * kBlockDim + x]; | 75 | 2.35k | float c01 = block[y * kBlockDim + num_2x2 + x]; | 76 | 2.35k | float c10 = block[(y + num_2x2) * kBlockDim + x]; | 77 | 2.35k | float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x]; | 78 | 2.35k | float r00 = c00 + c01 + c10 + c11; | 79 | 2.35k | float r01 = c00 + c01 - c10 - c11; | 80 | 2.35k | float r10 = c00 - c01 + c10 - c11; | 81 | 2.35k | float r11 = c00 - c01 - c10 + c11; | 82 | 2.35k | temp[y * 2 * kBlockDim + x * 2] = r00; | 83 | 2.35k | temp[y * 2 * kBlockDim + x * 2 + 1] = r01; | 84 | 2.35k | temp[(y * 2 + 1) * kBlockDim + x * 2] = r10; | 85 | 2.35k | temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11; | 86 | 2.35k | } | 87 | 588 | } | 88 | 1.32k | for (size_t y = 0; y < S; y++) { | 89 | 10.5k | for (size_t x = 0; x < S; x++) { | 90 | 9.40k | out[y * stride_out + x] = temp[y * kBlockDim + x]; | 91 | 9.40k | } | 92 | 1.17k | } | 93 | 147 | } |
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*) |
94 | | |
95 | 7.22k | void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) { |
96 | 7.22k | HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = { |
97 | 7.22k | { |
98 | 7.22k | 0.25, |
99 | 7.22k | 0.25, |
100 | 7.22k | 0.25, |
101 | 7.22k | 0.25, |
102 | 7.22k | 0.25, |
103 | 7.22k | 0.25, |
104 | 7.22k | 0.25, |
105 | 7.22k | 0.25, |
106 | 7.22k | 0.25, |
107 | 7.22k | 0.25, |
108 | 7.22k | 0.25, |
109 | 7.22k | 0.25, |
110 | 7.22k | 0.25, |
111 | 7.22k | 0.25, |
112 | 7.22k | 0.25, |
113 | 7.22k | 0.25, |
114 | 7.22k | }, |
115 | 7.22k | { |
116 | 7.22k | 0.876902929799142f, |
117 | 7.22k | 0.2206518106944235f, |
118 | 7.22k | -0.10140050393753763f, |
119 | 7.22k | -0.1014005039375375f, |
120 | 7.22k | 0.2206518106944236f, |
121 | 7.22k | -0.10140050393753777f, |
122 | 7.22k | -0.10140050393753772f, |
123 | 7.22k | -0.10140050393753763f, |
124 | 7.22k | -0.10140050393753758f, |
125 | 7.22k | -0.10140050393753769f, |
126 | 7.22k | -0.1014005039375375f, |
127 | 7.22k | -0.10140050393753768f, |
128 | 7.22k | -0.10140050393753768f, |
129 | 7.22k | -0.10140050393753759f, |
130 | 7.22k | -0.10140050393753763f, |
131 | 7.22k | -0.10140050393753741f, |
132 | 7.22k | }, |
133 | 7.22k | { |
134 | 7.22k | 0.0, |
135 | 7.22k | 0.0, |
136 | 7.22k | 0.40670075830260755f, |
137 | 7.22k | 0.44444816619734445f, |
138 | 7.22k | 0.0, |
139 | 7.22k | 0.0, |
140 | 7.22k | 0.19574399372042936f, |
141 | 7.22k | 0.2929100136981264f, |
142 | 7.22k | -0.40670075830260716f, |
143 | 7.22k | -0.19574399372042872f, |
144 | 7.22k | 0.0, |
145 | 7.22k | 0.11379074460448091f, |
146 | 7.22k | -0.44444816619734384f, |
147 | 7.22k | -0.29291001369812636f, |
148 | 7.22k | -0.1137907446044814f, |
149 | 7.22k | 0.0, |
150 | 7.22k | }, |
151 | 7.22k | { |
152 | 7.22k | 0.0, |
153 | 7.22k | 0.0, |
154 | 7.22k | -0.21255748058288748f, |
155 | 7.22k | 0.3085497062849767f, |
156 | 7.22k | 0.0, |
157 | 7.22k | 0.4706702258572536f, |
158 | 7.22k | -0.1621205195722993f, |
159 | 7.22k | 0.0, |
160 | 7.22k | -0.21255748058287047f, |
161 | 7.22k | -0.16212051957228327f, |
162 | 7.22k | -0.47067022585725277f, |
163 | 7.22k | -0.1464291867126764f, |
164 | 7.22k | 0.3085497062849487f, |
165 | 7.22k | 0.0, |
166 | 7.22k | -0.14642918671266536f, |
167 | 7.22k | 0.4251149611657548f, |
168 | 7.22k | }, |
169 | 7.22k | { |
170 | 7.22k | 0.0, |
171 | 7.22k | -0.7071067811865474f, |
172 | 7.22k | 0.0, |
173 | 7.22k | 0.0, |
174 | 7.22k | 0.7071067811865476f, |
175 | 7.22k | 0.0, |
176 | 7.22k | 0.0, |
177 | 7.22k | 0.0, |
178 | 7.22k | 0.0, |
179 | 7.22k | 0.0, |
180 | 7.22k | 0.0, |
181 | 7.22k | 0.0, |
182 | 7.22k | 0.0, |
183 | 7.22k | 0.0, |
184 | 7.22k | 0.0, |
185 | 7.22k | 0.0, |
186 | 7.22k | }, |
187 | 7.22k | { |
188 | 7.22k | -0.4105377591765233f, |
189 | 7.22k | 0.6235485373547691f, |
190 | 7.22k | -0.06435071657946274f, |
191 | 7.22k | -0.06435071657946266f, |
192 | 7.22k | 0.6235485373547694f, |
193 | 7.22k | -0.06435071657946284f, |
194 | 7.22k | -0.0643507165794628f, |
195 | 7.22k | -0.06435071657946274f, |
196 | 7.22k | -0.06435071657946272f, |
197 | 7.22k | -0.06435071657946279f, |
198 | 7.22k | -0.06435071657946266f, |
199 | 7.22k | -0.06435071657946277f, |
200 | 7.22k | -0.06435071657946277f, |
201 | 7.22k | -0.06435071657946273f, |
202 | 7.22k | -0.06435071657946274f, |
203 | 7.22k | -0.0643507165794626f, |
204 | 7.22k | }, |
205 | 7.22k | { |
206 | 7.22k | 0.0, |
207 | 7.22k | 0.0, |
208 | 7.22k | -0.4517556589999482f, |
209 | 7.22k | 0.15854503551840063f, |
210 | 7.22k | 0.0, |
211 | 7.22k | -0.04038515160822202f, |
212 | 7.22k | 0.0074182263792423875f, |
213 | 7.22k | 0.39351034269210167f, |
214 | 7.22k | -0.45175565899994635f, |
215 | 7.22k | 0.007418226379244351f, |
216 | 7.22k | 0.1107416575309343f, |
217 | 7.22k | 0.08298163094882051f, |
218 | 7.22k | 0.15854503551839705f, |
219 | 7.22k | 0.3935103426921022f, |
220 | 7.22k | 0.0829816309488214f, |
221 | 7.22k | -0.45175565899994796f, |
222 | 7.22k | }, |
223 | 7.22k | { |
224 | 7.22k | 0.0, |
225 | 7.22k | 0.0, |
226 | 7.22k | -0.304684750724869f, |
227 | 7.22k | 0.5112616136591823f, |
228 | 7.22k | 0.0, |
229 | 7.22k | 0.0, |
230 | 7.22k | -0.290480129728998f, |
231 | 7.22k | -0.06578701549142804f, |
232 | 7.22k | 0.304684750724884f, |
233 | 7.22k | 0.2904801297290076f, |
234 | 7.22k | 0.0, |
235 | 7.22k | -0.23889773523344604f, |
236 | 7.22k | -0.5112616136592012f, |
237 | 7.22k | 0.06578701549142545f, |
238 | 7.22k | 0.23889773523345467f, |
239 | 7.22k | 0.0, |
240 | 7.22k | }, |
241 | 7.22k | { |
242 | 7.22k | 0.0, |
243 | 7.22k | 0.0, |
244 | 7.22k | 0.3017929516615495f, |
245 | 7.22k | 0.25792362796341184f, |
246 | 7.22k | 0.0, |
247 | 7.22k | 0.16272340142866204f, |
248 | 7.22k | 0.09520022653475037f, |
249 | 7.22k | 0.0, |
250 | 7.22k | 0.3017929516615503f, |
251 | 7.22k | 0.09520022653475055f, |
252 | 7.22k | -0.16272340142866173f, |
253 | 7.22k | -0.35312385449816297f, |
254 | 7.22k | 0.25792362796341295f, |
255 | 7.22k | 0.0, |
256 | 7.22k | -0.3531238544981624f, |
257 | 7.22k | -0.6035859033230976f, |
258 | 7.22k | }, |
259 | 7.22k | { |
260 | 7.22k | 0.0, |
261 | 7.22k | 0.0, |
262 | 7.22k | 0.40824829046386274f, |
263 | 7.22k | 0.0, |
264 | 7.22k | 0.0, |
265 | 7.22k | 0.0, |
266 | 7.22k | 0.0, |
267 | 7.22k | -0.4082482904638628f, |
268 | 7.22k | -0.4082482904638635f, |
269 | 7.22k | 0.0, |
270 | 7.22k | 0.0, |
271 | 7.22k | -0.40824829046386296f, |
272 | 7.22k | 0.0, |
273 | 7.22k | 0.4082482904638634f, |
274 | 7.22k | 0.408248290463863f, |
275 | 7.22k | 0.0, |
276 | 7.22k | }, |
277 | 7.22k | { |
278 | 7.22k | 0.0, |
279 | 7.22k | 0.0, |
280 | 7.22k | 0.1747866975480809f, |
281 | 7.22k | 0.0812611176717539f, |
282 | 7.22k | 0.0, |
283 | 7.22k | 0.0, |
284 | 7.22k | -0.3675398009862027f, |
285 | 7.22k | -0.307882213957909f, |
286 | 7.22k | -0.17478669754808135f, |
287 | 7.22k | 0.3675398009862011f, |
288 | 7.22k | 0.0, |
289 | 7.22k | 0.4826689115059883f, |
290 | 7.22k | -0.08126111767175039f, |
291 | 7.22k | 0.30788221395790305f, |
292 | 7.22k | -0.48266891150598584f, |
293 | 7.22k | 0.0, |
294 | 7.22k | }, |
295 | 7.22k | { |
296 | 7.22k | 0.0, |
297 | 7.22k | 0.0, |
298 | 7.22k | -0.21105601049335784f, |
299 | 7.22k | 0.18567180916109802f, |
300 | 7.22k | 0.0, |
301 | 7.22k | 0.0, |
302 | 7.22k | 0.49215859013738733f, |
303 | 7.22k | -0.38525013709251915f, |
304 | 7.22k | 0.21105601049335806f, |
305 | 7.22k | -0.49215859013738905f, |
306 | 7.22k | 0.0, |
307 | 7.22k | 0.17419412659916217f, |
308 | 7.22k | -0.18567180916109904f, |
309 | 7.22k | 0.3852501370925211f, |
310 | 7.22k | -0.1741941265991621f, |
311 | 7.22k | 0.0, |
312 | 7.22k | }, |
313 | 7.22k | { |
314 | 7.22k | 0.0, |
315 | 7.22k | 0.0, |
316 | 7.22k | -0.14266084808807264f, |
317 | 7.22k | -0.3416446842253372f, |
318 | 7.22k | 0.0, |
319 | 7.22k | 0.7367497537172237f, |
320 | 7.22k | 0.24627107722075148f, |
321 | 7.22k | -0.08574019035519306f, |
322 | 7.22k | -0.14266084808807344f, |
323 | 7.22k | 0.24627107722075137f, |
324 | 7.22k | 0.14883399227113567f, |
325 | 7.22k | -0.04768680350229251f, |
326 | 7.22k | -0.3416446842253373f, |
327 | 7.22k | -0.08574019035519267f, |
328 | 7.22k | -0.047686803502292804f, |
329 | 7.22k | -0.14266084808807242f, |
330 | 7.22k | }, |
331 | 7.22k | { |
332 | 7.22k | 0.0, |
333 | 7.22k | 0.0, |
334 | 7.22k | -0.13813540350758585f, |
335 | 7.22k | 0.3302282550303788f, |
336 | 7.22k | 0.0, |
337 | 7.22k | 0.08755115000587084f, |
338 | 7.22k | -0.07946706605909573f, |
339 | 7.22k | -0.4613374887461511f, |
340 | 7.22k | -0.13813540350758294f, |
341 | 7.22k | -0.07946706605910261f, |
342 | 7.22k | 0.49724647109535086f, |
343 | 7.22k | 0.12538059448563663f, |
344 | 7.22k | 0.3302282550303805f, |
345 | 7.22k | -0.4613374887461554f, |
346 | 7.22k | 0.12538059448564315f, |
347 | 7.22k | -0.13813540350758452f, |
348 | 7.22k | }, |
349 | 7.22k | { |
350 | 7.22k | 0.0, |
351 | 7.22k | 0.0, |
352 | 7.22k | -0.17437602599651067f, |
353 | 7.22k | 0.0702790691196284f, |
354 | 7.22k | 0.0, |
355 | 7.22k | -0.2921026642334881f, |
356 | 7.22k | 0.3623817333531167f, |
357 | 7.22k | 0.0, |
358 | 7.22k | -0.1743760259965108f, |
359 | 7.22k | 0.36238173335311646f, |
360 | 7.22k | 0.29210266423348785f, |
361 | 7.22k | -0.4326608024727445f, |
362 | 7.22k | 0.07027906911962818f, |
363 | 7.22k | 0.0, |
364 | 7.22k | -0.4326608024727457f, |
365 | 7.22k | 0.34875205199302267f, |
366 | 7.22k | }, |
367 | 7.22k | { |
368 | 7.22k | 0.0, |
369 | 7.22k | 0.0, |
370 | 7.22k | 0.11354987314994337f, |
371 | 7.22k | -0.07417504595810355f, |
372 | 7.22k | 0.0, |
373 | 7.22k | 0.19402893032594343f, |
374 | 7.22k | -0.435190496523228f, |
375 | 7.22k | 0.21918684838857466f, |
376 | 7.22k | 0.11354987314994257f, |
377 | 7.22k | -0.4351904965232251f, |
378 | 7.22k | 0.5550443808910661f, |
379 | 7.22k | -0.25468277124066463f, |
380 | 7.22k | -0.07417504595810233f, |
381 | 7.22k | 0.2191868483885728f, |
382 | 7.22k | -0.25468277124066413f, |
383 | 7.22k | 0.1135498731499429f, |
384 | 7.22k | }, |
385 | 7.22k | }; |
386 | | |
387 | 7.22k | const HWY_CAPPED(float, 16) d; |
388 | 122k | for (size_t i = 0; i < 16; i += Lanes(d)) { |
389 | 115k | auto pixel = Zero(d); |
390 | 1.96M | for (size_t j = 0; j < 16; j++) { |
391 | 1.84M | auto cf = Set(d, coeffs[j]); |
392 | 1.84M | auto basis = Load(d, k4x4AFVBasis[j] + i); |
393 | 1.84M | pixel = MulAdd(cf, basis, pixel); |
394 | 1.84M | } |
395 | 115k | Store(pixel, d, pixels + i); |
396 | 115k | } |
397 | 7.22k | } Unexecuted instantiation: enc_group.cc:jxl::N_SCALAR::(anonymous namespace)::AFVIDCT4x4(float const*, float*) dec_group.cc:jxl::N_SCALAR::(anonymous namespace)::AFVIDCT4x4(float const*, float*) Line | Count | Source | 95 | 7.22k | void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) { | 96 | 7.22k | HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = { | 97 | 7.22k | { | 98 | 7.22k | 0.25, | 99 | 7.22k | 0.25, | 100 | 7.22k | 0.25, | 101 | 7.22k | 0.25, | 102 | 7.22k | 0.25, | 103 | 7.22k | 0.25, | 104 | 7.22k | 0.25, | 105 | 7.22k | 0.25, | 106 | 7.22k | 0.25, | 107 | 7.22k | 0.25, | 108 | 7.22k | 0.25, | 109 | 7.22k | 0.25, | 110 | 7.22k | 0.25, | 111 | 7.22k | 0.25, | 112 | 7.22k | 0.25, | 113 | 7.22k | 0.25, | 114 | 7.22k | }, | 115 | 7.22k | { | 116 | 7.22k | 0.876902929799142f, | 117 | 7.22k | 0.2206518106944235f, | 118 | 7.22k | -0.10140050393753763f, | 119 | 7.22k | -0.1014005039375375f, | 120 | 7.22k | 0.2206518106944236f, | 121 | 7.22k | -0.10140050393753777f, | 122 | 7.22k | -0.10140050393753772f, | 123 | 7.22k | -0.10140050393753763f, | 124 | 7.22k | -0.10140050393753758f, | 125 | 7.22k | -0.10140050393753769f, | 126 | 7.22k | -0.1014005039375375f, | 127 | 7.22k | -0.10140050393753768f, | 128 | 7.22k | -0.10140050393753768f, | 129 | 7.22k | -0.10140050393753759f, | 130 | 7.22k | -0.10140050393753763f, | 131 | 7.22k | -0.10140050393753741f, | 132 | 7.22k | }, | 133 | 7.22k | { | 134 | 7.22k | 0.0, | 135 | 7.22k | 0.0, | 136 | 7.22k | 0.40670075830260755f, | 137 | 7.22k | 0.44444816619734445f, | 138 | 7.22k | 0.0, | 139 | 7.22k | 0.0, | 140 | 7.22k | 0.19574399372042936f, | 141 | 7.22k | 0.2929100136981264f, | 142 | 7.22k | -0.40670075830260716f, | 143 | 7.22k | -0.19574399372042872f, | 144 | 7.22k | 0.0, | 145 | 7.22k | 0.11379074460448091f, | 146 | 7.22k | -0.44444816619734384f, | 147 | 7.22k | -0.29291001369812636f, | 148 | 7.22k | -0.1137907446044814f, | 149 | 7.22k | 0.0, | 150 | 7.22k | }, | 151 | 7.22k | { | 152 | 7.22k | 0.0, | 153 | 7.22k | 0.0, | 154 | 7.22k | -0.21255748058288748f, | 155 | 7.22k | 0.3085497062849767f, | 156 | 7.22k | 0.0, | 157 | 7.22k | 0.4706702258572536f, | 158 | 7.22k | -0.1621205195722993f, | 159 | 7.22k | 0.0, | 160 | 7.22k | -0.21255748058287047f, | 161 | 7.22k | -0.16212051957228327f, | 162 | 7.22k | -0.47067022585725277f, | 163 | 7.22k | -0.1464291867126764f, | 164 | 7.22k | 0.3085497062849487f, | 165 | 7.22k | 0.0, | 166 | 7.22k | -0.14642918671266536f, | 167 | 7.22k | 0.4251149611657548f, | 168 | 7.22k | }, | 169 | 7.22k | { | 170 | 7.22k | 0.0, | 171 | 7.22k | -0.7071067811865474f, | 172 | 7.22k | 0.0, | 173 | 7.22k | 0.0, | 174 | 7.22k | 0.7071067811865476f, | 175 | 7.22k | 0.0, | 176 | 7.22k | 0.0, | 177 | 7.22k | 0.0, | 178 | 7.22k | 0.0, | 179 | 7.22k | 0.0, | 180 | 7.22k | 0.0, | 181 | 7.22k | 0.0, | 182 | 7.22k | 0.0, | 183 | 7.22k | 0.0, | 184 | 7.22k | 0.0, | 185 | 7.22k | 0.0, | 186 | 7.22k | }, | 187 | 7.22k | { | 188 | 7.22k | -0.4105377591765233f, | 189 | 7.22k | 0.6235485373547691f, | 190 | 7.22k | -0.06435071657946274f, | 191 | 7.22k | -0.06435071657946266f, | 192 | 7.22k | 0.6235485373547694f, | 193 | 7.22k | -0.06435071657946284f, | 194 | 7.22k | -0.0643507165794628f, | 195 | 7.22k | -0.06435071657946274f, | 196 | 7.22k | -0.06435071657946272f, | 197 | 7.22k | -0.06435071657946279f, | 198 | 7.22k | -0.06435071657946266f, | 199 | 7.22k | -0.06435071657946277f, | 200 | 7.22k | -0.06435071657946277f, | 201 | 7.22k | -0.06435071657946273f, | 202 | 7.22k | -0.06435071657946274f, | 203 | 7.22k | -0.0643507165794626f, | 204 | 7.22k | }, | 205 | 7.22k | { | 206 | 7.22k | 0.0, | 207 | 7.22k | 0.0, | 208 | 7.22k | -0.4517556589999482f, | 209 | 7.22k | 0.15854503551840063f, | 210 | 7.22k | 0.0, | 211 | 7.22k | -0.04038515160822202f, | 212 | 7.22k | 0.0074182263792423875f, | 213 | 7.22k | 0.39351034269210167f, | 214 | 7.22k | -0.45175565899994635f, | 215 | 7.22k | 0.007418226379244351f, | 216 | 7.22k | 0.1107416575309343f, | 217 | 7.22k | 0.08298163094882051f, | 218 | 7.22k | 0.15854503551839705f, | 219 | 7.22k | 0.3935103426921022f, | 220 | 7.22k | 0.0829816309488214f, | 221 | 7.22k | -0.45175565899994796f, | 222 | 7.22k | }, | 223 | 7.22k | { | 224 | 7.22k | 0.0, | 225 | 7.22k | 0.0, | 226 | 7.22k | -0.304684750724869f, | 227 | 7.22k | 0.5112616136591823f, | 228 | 7.22k | 0.0, | 229 | 7.22k | 0.0, | 230 | 7.22k | -0.290480129728998f, | 231 | 7.22k | -0.06578701549142804f, | 232 | 7.22k | 0.304684750724884f, | 233 | 7.22k | 0.2904801297290076f, | 234 | 7.22k | 0.0, | 235 | 7.22k | -0.23889773523344604f, | 236 | 7.22k | -0.5112616136592012f, | 237 | 7.22k | 0.06578701549142545f, | 238 | 7.22k | 0.23889773523345467f, | 239 | 7.22k | 0.0, | 240 | 7.22k | }, | 241 | 7.22k | { | 242 | 7.22k | 0.0, | 243 | 7.22k | 0.0, | 244 | 7.22k | 0.3017929516615495f, | 245 | 7.22k | 0.25792362796341184f, | 246 | 7.22k | 0.0, | 247 | 7.22k | 0.16272340142866204f, | 248 | 7.22k | 0.09520022653475037f, | 249 | 7.22k | 0.0, | 250 | 7.22k | 0.3017929516615503f, | 251 | 7.22k | 0.09520022653475055f, | 252 | 7.22k | -0.16272340142866173f, | 253 | 7.22k | -0.35312385449816297f, | 254 | 7.22k | 0.25792362796341295f, | 255 | 7.22k | 0.0, | 256 | 7.22k | -0.3531238544981624f, | 257 | 7.22k | -0.6035859033230976f, | 258 | 7.22k | }, | 259 | 7.22k | { | 260 | 7.22k | 0.0, | 261 | 7.22k | 0.0, | 262 | 7.22k | 0.40824829046386274f, | 263 | 7.22k | 0.0, | 264 | 7.22k | 0.0, | 265 | 7.22k | 0.0, | 266 | 7.22k | 0.0, | 267 | 7.22k | -0.4082482904638628f, | 268 | 7.22k | -0.4082482904638635f, | 269 | 7.22k | 0.0, | 270 | 7.22k | 0.0, | 271 | 7.22k | -0.40824829046386296f, | 272 | 7.22k | 0.0, | 273 | 7.22k | 0.4082482904638634f, | 274 | 7.22k | 0.408248290463863f, | 275 | 7.22k | 0.0, | 276 | 7.22k | }, | 277 | 7.22k | { | 278 | 7.22k | 0.0, | 279 | 7.22k | 0.0, | 280 | 7.22k | 0.1747866975480809f, | 281 | 7.22k | 0.0812611176717539f, | 282 | 7.22k | 0.0, | 283 | 7.22k | 0.0, | 284 | 7.22k | -0.3675398009862027f, | 285 | 7.22k | -0.307882213957909f, | 286 | 7.22k | -0.17478669754808135f, | 287 | 7.22k | 0.3675398009862011f, | 288 | 7.22k | 0.0, | 289 | 7.22k | 0.4826689115059883f, | 290 | 7.22k | -0.08126111767175039f, | 291 | 7.22k | 0.30788221395790305f, | 292 | 7.22k | -0.48266891150598584f, | 293 | 7.22k | 0.0, | 294 | 7.22k | }, | 295 | 7.22k | { | 296 | 7.22k | 0.0, | 297 | 7.22k | 0.0, | 298 | 7.22k | -0.21105601049335784f, | 299 | 7.22k | 0.18567180916109802f, | 300 | 7.22k | 0.0, | 301 | 7.22k | 0.0, | 302 | 7.22k | 0.49215859013738733f, | 303 | 7.22k | -0.38525013709251915f, | 304 | 7.22k | 0.21105601049335806f, | 305 | 7.22k | -0.49215859013738905f, | 306 | 7.22k | 0.0, | 307 | 7.22k | 0.17419412659916217f, | 308 | 7.22k | -0.18567180916109904f, | 309 | 7.22k | 0.3852501370925211f, | 310 | 7.22k | -0.1741941265991621f, | 311 | 7.22k | 0.0, | 312 | 7.22k | }, | 313 | 7.22k | { | 314 | 7.22k | 0.0, | 315 | 7.22k | 0.0, | 316 | 7.22k | -0.14266084808807264f, | 317 | 7.22k | -0.3416446842253372f, | 318 | 7.22k | 0.0, | 319 | 7.22k | 0.7367497537172237f, | 320 | 7.22k | 0.24627107722075148f, | 321 | 7.22k | -0.08574019035519306f, | 322 | 7.22k | -0.14266084808807344f, | 323 | 7.22k | 0.24627107722075137f, | 324 | 7.22k | 0.14883399227113567f, | 325 | 7.22k | -0.04768680350229251f, | 326 | 7.22k | -0.3416446842253373f, | 327 | 7.22k | -0.08574019035519267f, | 328 | 7.22k | -0.047686803502292804f, | 329 | 7.22k | -0.14266084808807242f, | 330 | 7.22k | }, | 331 | 7.22k | { | 332 | 7.22k | 0.0, | 333 | 7.22k | 0.0, | 334 | 7.22k | -0.13813540350758585f, | 335 | 7.22k | 0.3302282550303788f, | 336 | 7.22k | 0.0, | 337 | 7.22k | 0.08755115000587084f, | 338 | 7.22k | -0.07946706605909573f, | 339 | 7.22k | -0.4613374887461511f, | 340 | 7.22k | -0.13813540350758294f, | 341 | 7.22k | -0.07946706605910261f, | 342 | 7.22k | 0.49724647109535086f, | 343 | 7.22k | 0.12538059448563663f, | 344 | 7.22k | 0.3302282550303805f, | 345 | 7.22k | -0.4613374887461554f, | 346 | 7.22k | 0.12538059448564315f, | 347 | 7.22k | -0.13813540350758452f, | 348 | 7.22k | }, | 349 | 7.22k | { | 350 | 7.22k | 0.0, | 351 | 7.22k | 0.0, | 352 | 7.22k | -0.17437602599651067f, | 353 | 7.22k | 0.0702790691196284f, | 354 | 7.22k | 0.0, | 355 | 7.22k | -0.2921026642334881f, | 356 | 7.22k | 0.3623817333531167f, | 357 | 7.22k | 0.0, | 358 | 7.22k | -0.1743760259965108f, | 359 | 7.22k | 0.36238173335311646f, | 360 | 7.22k | 0.29210266423348785f, | 361 | 7.22k | -0.4326608024727445f, | 362 | 7.22k | 0.07027906911962818f, | 363 | 7.22k | 0.0, | 364 | 7.22k | -0.4326608024727457f, | 365 | 7.22k | 0.34875205199302267f, | 366 | 7.22k | }, | 367 | 7.22k | { | 368 | 7.22k | 0.0, | 369 | 7.22k | 0.0, | 370 | 7.22k | 0.11354987314994337f, | 371 | 7.22k | -0.07417504595810355f, | 372 | 7.22k | 0.0, | 373 | 7.22k | 0.19402893032594343f, | 374 | 7.22k | -0.435190496523228f, | 375 | 7.22k | 0.21918684838857466f, | 376 | 7.22k | 0.11354987314994257f, | 377 | 7.22k | -0.4351904965232251f, | 378 | 7.22k | 0.5550443808910661f, | 379 | 7.22k | -0.25468277124066463f, | 380 | 7.22k | -0.07417504595810233f, | 381 | 7.22k | 0.2191868483885728f, | 382 | 7.22k | -0.25468277124066413f, | 383 | 7.22k | 0.1135498731499429f, | 384 | 7.22k | }, | 385 | 7.22k | }; | 386 | | | 387 | 7.22k | const HWY_CAPPED(float, 16) d; | 388 | 122k | for (size_t i = 0; i < 16; i += Lanes(d)) { | 389 | 115k | auto pixel = Zero(d); | 390 | 1.96M | for (size_t j = 0; j < 16; j++) { | 391 | 1.84M | auto cf = Set(d, coeffs[j]); | 392 | 1.84M | auto basis = Load(d, k4x4AFVBasis[j] + i); | 393 | 1.84M | pixel = MulAdd(cf, basis, pixel); | 394 | 1.84M | } | 395 | 115k | Store(pixel, d, pixels + i); | 396 | 115k | } | 397 | 7.22k | } |
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SCALAR::(anonymous namespace)::AFVIDCT4x4(float const*, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SCALAR::(anonymous namespace)::AFVIDCT4x4(float const*, float*) |
398 | | |
399 | | template <size_t afv_kind> |
400 | | void AFVTransformToPixels(const float* JXL_RESTRICT coefficients, |
401 | 7.22k | float* JXL_RESTRICT pixels, size_t pixels_stride) { |
402 | 7.22k | HWY_ALIGN float scratch_space[4 * 8 * 4]; |
403 | 7.22k | size_t afv_x = afv_kind & 1; |
404 | 7.22k | size_t afv_y = afv_kind / 2; |
405 | 7.22k | float dcs[3] = {}; |
406 | 7.22k | float block00 = coefficients[0]; |
407 | 7.22k | float block01 = coefficients[1]; |
408 | 7.22k | float block10 = coefficients[8]; |
409 | 7.22k | dcs[0] = (block00 + block10 + block01) * 4.0f; |
410 | 7.22k | dcs[1] = (block00 + block10 - block01); |
411 | 7.22k | dcs[2] = block00 - block10; |
412 | | // IAFV: (even, even) positions. |
413 | 7.22k | HWY_ALIGN float coeff[4 * 4]; |
414 | 7.22k | coeff[0] = dcs[0]; |
415 | 36.1k | for (size_t iy = 0; iy < 4; iy++) { |
416 | 144k | for (size_t ix = 0; ix < 4; ix++) { |
417 | 115k | if (ix == 0 && iy == 0) continue; |
418 | 108k | coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2]; |
419 | 108k | } |
420 | 28.8k | } |
421 | 7.22k | HWY_ALIGN float block[4 * 8]; |
422 | 7.22k | AFVIDCT4x4(coeff, block); |
423 | 36.1k | for (size_t iy = 0; iy < 4; iy++) { |
424 | 144k | for (size_t ix = 0; ix < 4; ix++) { |
425 | 115k | pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] = |
426 | 115k | block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)]; |
427 | 115k | } |
428 | 28.8k | } |
429 | | // IDCT4x4 in (odd, even) positions. |
430 | 7.22k | block[0] = dcs[1]; |
431 | 36.1k | for (size_t iy = 0; iy < 4; iy++) { |
432 | 144k | for (size_t ix = 0; ix < 4; ix++) { |
433 | 115k | if (ix == 0 && iy == 0) continue; |
434 | 108k | block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1]; |
435 | 108k | } |
436 | 28.8k | } |
437 | 7.22k | ComputeScaledIDCT<4, 4>()( |
438 | 7.22k | block, |
439 | 7.22k | DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4), |
440 | 7.22k | pixels_stride), |
441 | 7.22k | scratch_space); |
442 | | // IDCT4x8. |
443 | 7.22k | block[0] = dcs[2]; |
444 | 36.1k | for (size_t iy = 0; iy < 4; iy++) { |
445 | 259k | for (size_t ix = 0; ix < 8; ix++) { |
446 | 231k | if (ix == 0 && iy == 0) continue; |
447 | 223k | block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix]; |
448 | 223k | } |
449 | 28.8k | } |
450 | 7.22k | ComputeScaledIDCT<4, 8>()( |
451 | 7.22k | block, |
452 | 7.22k | DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride), |
453 | 7.22k | scratch_space); |
454 | 7.22k | } Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long) Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long) Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long) Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long) dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long) Line | Count | Source | 401 | 54 | float* JXL_RESTRICT pixels, size_t pixels_stride) { | 402 | 54 | HWY_ALIGN float scratch_space[4 * 8 * 4]; | 403 | 54 | size_t afv_x = afv_kind & 1; | 404 | 54 | size_t afv_y = afv_kind / 2; | 405 | 54 | float dcs[3] = {}; | 406 | 54 | float block00 = coefficients[0]; | 407 | 54 | float block01 = coefficients[1]; | 408 | 54 | float block10 = coefficients[8]; | 409 | 54 | dcs[0] = (block00 + block10 + block01) * 4.0f; | 410 | 54 | dcs[1] = (block00 + block10 - block01); | 411 | 54 | dcs[2] = block00 - block10; | 412 | | // IAFV: (even, even) positions. | 413 | 54 | HWY_ALIGN float coeff[4 * 4]; | 414 | 54 | coeff[0] = dcs[0]; | 415 | 270 | for (size_t iy = 0; iy < 4; iy++) { | 416 | 1.08k | for (size_t ix = 0; ix < 4; ix++) { | 417 | 864 | if (ix == 0 && iy == 0) continue; | 418 | 810 | coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2]; | 419 | 810 | } | 420 | 216 | } | 421 | 54 | HWY_ALIGN float block[4 * 8]; | 422 | 54 | AFVIDCT4x4(coeff, block); | 423 | 270 | for (size_t iy = 0; iy < 4; iy++) { | 424 | 1.08k | for (size_t ix = 0; ix < 4; ix++) { | 425 | 864 | pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] = | 426 | 864 | block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)]; | 427 | 864 | } | 428 | 216 | } | 429 | | // IDCT4x4 in (odd, even) positions. | 430 | 54 | block[0] = dcs[1]; | 431 | 270 | for (size_t iy = 0; iy < 4; iy++) { | 432 | 1.08k | for (size_t ix = 0; ix < 4; ix++) { | 433 | 864 | if (ix == 0 && iy == 0) continue; | 434 | 810 | block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1]; | 435 | 810 | } | 436 | 216 | } | 437 | 54 | ComputeScaledIDCT<4, 4>()( | 438 | 54 | block, | 439 | 54 | DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4), | 440 | 54 | pixels_stride), | 441 | 54 | scratch_space); | 442 | | // IDCT4x8. | 443 | 54 | block[0] = dcs[2]; | 444 | 270 | for (size_t iy = 0; iy < 4; iy++) { | 445 | 1.94k | for (size_t ix = 0; ix < 8; ix++) { | 446 | 1.72k | if (ix == 0 && iy == 0) continue; | 447 | 1.67k | block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix]; | 448 | 1.67k | } | 449 | 216 | } | 450 | 54 | ComputeScaledIDCT<4, 8>()( | 451 | 54 | block, | 452 | 54 | DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride), | 453 | 54 | scratch_space); | 454 | 54 | } |
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long) Line | Count | Source | 401 | 30 | float* JXL_RESTRICT pixels, size_t pixels_stride) { | 402 | 30 | HWY_ALIGN float scratch_space[4 * 8 * 4]; | 403 | 30 | size_t afv_x = afv_kind & 1; | 404 | 30 | size_t afv_y = afv_kind / 2; | 405 | 30 | float dcs[3] = {}; | 406 | 30 | float block00 = coefficients[0]; | 407 | 30 | float block01 = coefficients[1]; | 408 | 30 | float block10 = coefficients[8]; | 409 | 30 | dcs[0] = (block00 + block10 + block01) * 4.0f; | 410 | 30 | dcs[1] = (block00 + block10 - block01); | 411 | 30 | dcs[2] = block00 - block10; | 412 | | // IAFV: (even, even) positions. | 413 | 30 | HWY_ALIGN float coeff[4 * 4]; | 414 | 30 | coeff[0] = dcs[0]; | 415 | 150 | for (size_t iy = 0; iy < 4; iy++) { | 416 | 600 | for (size_t ix = 0; ix < 4; ix++) { | 417 | 480 | if (ix == 0 && iy == 0) continue; | 418 | 450 | coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2]; | 419 | 450 | } | 420 | 120 | } | 421 | 30 | HWY_ALIGN float block[4 * 8]; | 422 | 30 | AFVIDCT4x4(coeff, block); | 423 | 150 | for (size_t iy = 0; iy < 4; iy++) { | 424 | 600 | for (size_t ix = 0; ix < 4; ix++) { | 425 | 480 | pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] = | 426 | 480 | block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)]; | 427 | 480 | } | 428 | 120 | } | 429 | | // IDCT4x4 in (odd, even) positions. | 430 | 30 | block[0] = dcs[1]; | 431 | 150 | for (size_t iy = 0; iy < 4; iy++) { | 432 | 600 | for (size_t ix = 0; ix < 4; ix++) { | 433 | 480 | if (ix == 0 && iy == 0) continue; | 434 | 450 | block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1]; | 435 | 450 | } | 436 | 120 | } | 437 | 30 | ComputeScaledIDCT<4, 4>()( | 438 | 30 | block, | 439 | 30 | DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4), | 440 | 30 | pixels_stride), | 441 | 30 | scratch_space); | 442 | | // IDCT4x8. | 443 | 30 | block[0] = dcs[2]; | 444 | 150 | for (size_t iy = 0; iy < 4; iy++) { | 445 | 1.08k | for (size_t ix = 0; ix < 8; ix++) { | 446 | 960 | if (ix == 0 && iy == 0) continue; | 447 | 930 | block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix]; | 448 | 930 | } | 449 | 120 | } | 450 | 30 | ComputeScaledIDCT<4, 8>()( | 451 | 30 | block, | 452 | 30 | DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride), | 453 | 30 | scratch_space); | 454 | 30 | } |
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long) Line | Count | Source | 401 | 495 | float* JXL_RESTRICT pixels, size_t pixels_stride) { | 402 | 495 | HWY_ALIGN float scratch_space[4 * 8 * 4]; | 403 | 495 | size_t afv_x = afv_kind & 1; | 404 | 495 | size_t afv_y = afv_kind / 2; | 405 | 495 | float dcs[3] = {}; | 406 | 495 | float block00 = coefficients[0]; | 407 | 495 | float block01 = coefficients[1]; | 408 | 495 | float block10 = coefficients[8]; | 409 | 495 | dcs[0] = (block00 + block10 + block01) * 4.0f; | 410 | 495 | dcs[1] = (block00 + block10 - block01); | 411 | 495 | dcs[2] = block00 - block10; | 412 | | // IAFV: (even, even) positions. | 413 | 495 | HWY_ALIGN float coeff[4 * 4]; | 414 | 495 | coeff[0] = dcs[0]; | 415 | 2.47k | for (size_t iy = 0; iy < 4; iy++) { | 416 | 9.90k | for (size_t ix = 0; ix < 4; ix++) { | 417 | 7.92k | if (ix == 0 && iy == 0) continue; | 418 | 7.42k | coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2]; | 419 | 7.42k | } | 420 | 1.98k | } | 421 | 495 | HWY_ALIGN float block[4 * 8]; | 422 | 495 | AFVIDCT4x4(coeff, block); | 423 | 2.47k | for (size_t iy = 0; iy < 4; iy++) { | 424 | 9.90k | for (size_t ix = 0; ix < 4; ix++) { | 425 | 7.92k | pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] = | 426 | 7.92k | block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)]; | 427 | 7.92k | } | 428 | 1.98k | } | 429 | | // IDCT4x4 in (odd, even) positions. | 430 | 495 | block[0] = dcs[1]; | 431 | 2.47k | for (size_t iy = 0; iy < 4; iy++) { | 432 | 9.90k | for (size_t ix = 0; ix < 4; ix++) { | 433 | 7.92k | if (ix == 0 && iy == 0) continue; | 434 | 7.42k | block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1]; | 435 | 7.42k | } | 436 | 1.98k | } | 437 | 495 | ComputeScaledIDCT<4, 4>()( | 438 | 495 | block, | 439 | 495 | DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4), | 440 | 495 | pixels_stride), | 441 | 495 | scratch_space); | 442 | | // IDCT4x8. | 443 | 495 | block[0] = dcs[2]; | 444 | 2.47k | for (size_t iy = 0; iy < 4; iy++) { | 445 | 17.8k | for (size_t ix = 0; ix < 8; ix++) { | 446 | 15.8k | if (ix == 0 && iy == 0) continue; | 447 | 15.3k | block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix]; | 448 | 15.3k | } | 449 | 1.98k | } | 450 | 495 | ComputeScaledIDCT<4, 8>()( | 451 | 495 | block, | 452 | 495 | DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride), | 453 | 495 | scratch_space); | 454 | 495 | } |
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long) Line | Count | Source | 401 | 6.64k | float* JXL_RESTRICT pixels, size_t pixels_stride) { | 402 | 6.64k | HWY_ALIGN float scratch_space[4 * 8 * 4]; | 403 | 6.64k | size_t afv_x = afv_kind & 1; | 404 | 6.64k | size_t afv_y = afv_kind / 2; | 405 | 6.64k | float dcs[3] = {}; | 406 | 6.64k | float block00 = coefficients[0]; | 407 | 6.64k | float block01 = coefficients[1]; | 408 | 6.64k | float block10 = coefficients[8]; | 409 | 6.64k | dcs[0] = (block00 + block10 + block01) * 4.0f; | 410 | 6.64k | dcs[1] = (block00 + block10 - block01); | 411 | 6.64k | dcs[2] = block00 - block10; | 412 | | // IAFV: (even, even) positions. | 413 | 6.64k | HWY_ALIGN float coeff[4 * 4]; | 414 | 6.64k | coeff[0] = dcs[0]; | 415 | 33.2k | for (size_t iy = 0; iy < 4; iy++) { | 416 | 132k | for (size_t ix = 0; ix < 4; ix++) { | 417 | 106k | if (ix == 0 && iy == 0) continue; | 418 | 99.6k | coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2]; | 419 | 99.6k | } | 420 | 26.5k | } | 421 | 6.64k | HWY_ALIGN float block[4 * 8]; | 422 | 6.64k | AFVIDCT4x4(coeff, block); | 423 | 33.2k | for (size_t iy = 0; iy < 4; iy++) { | 424 | 132k | for (size_t ix = 0; ix < 4; ix++) { | 425 | 106k | pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] = | 426 | 106k | block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)]; | 427 | 106k | } | 428 | 26.5k | } | 429 | | // IDCT4x4 in (odd, even) positions. | 430 | 6.64k | block[0] = dcs[1]; | 431 | 33.2k | for (size_t iy = 0; iy < 4; iy++) { | 432 | 132k | for (size_t ix = 0; ix < 4; ix++) { | 433 | 106k | if (ix == 0 && iy == 0) continue; | 434 | 99.6k | block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1]; | 435 | 99.6k | } | 436 | 26.5k | } | 437 | 6.64k | ComputeScaledIDCT<4, 4>()( | 438 | 6.64k | block, | 439 | 6.64k | DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4), | 440 | 6.64k | pixels_stride), | 441 | 6.64k | scratch_space); | 442 | | // IDCT4x8. | 443 | 6.64k | block[0] = dcs[2]; | 444 | 33.2k | for (size_t iy = 0; iy < 4; iy++) { | 445 | 239k | for (size_t ix = 0; ix < 8; ix++) { | 446 | 212k | if (ix == 0 && iy == 0) continue; | 447 | 205k | block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix]; | 448 | 205k | } | 449 | 26.5k | } | 450 | 6.64k | ComputeScaledIDCT<4, 8>()( | 451 | 6.64k | block, | 452 | 6.64k | DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride), | 453 | 6.64k | scratch_space); | 454 | 6.64k | } |
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long) |
455 | | |
456 | | HWY_MAYBE_UNUSED void TransformToPixels(const AcStrategyType strategy, |
457 | | float* JXL_RESTRICT coefficients, |
458 | | float* JXL_RESTRICT pixels, |
459 | | size_t pixels_stride, |
460 | 99.1k | float* scratch_space) { |
461 | 99.1k | using Type = AcStrategyType; |
462 | 99.1k | switch (strategy) { |
463 | 19.1k | case Type::IDENTITY: { |
464 | 19.1k | float dcs[4] = {}; |
465 | 19.1k | float block00 = coefficients[0]; |
466 | 19.1k | float block01 = coefficients[1]; |
467 | 19.1k | float block10 = coefficients[8]; |
468 | 19.1k | float block11 = coefficients[9]; |
469 | 19.1k | dcs[0] = block00 + block01 + block10 + block11; |
470 | 19.1k | dcs[1] = block00 + block01 - block10 - block11; |
471 | 19.1k | dcs[2] = block00 - block01 + block10 - block11; |
472 | 19.1k | dcs[3] = block00 - block01 - block10 + block11; |
473 | 57.5k | for (size_t y = 0; y < 2; y++) { |
474 | 115k | for (size_t x = 0; x < 2; x++) { |
475 | 76.7k | float block_dc = dcs[y * 2 + x]; |
476 | 76.7k | float residual_sum = 0; |
477 | 383k | for (size_t iy = 0; iy < 4; iy++) { |
478 | 1.53M | for (size_t ix = 0; ix < 4; ix++) { |
479 | 1.22M | if (ix == 0 && iy == 0) continue; |
480 | 1.15M | residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2]; |
481 | 1.15M | } |
482 | 307k | } |
483 | 76.7k | pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] = |
484 | 76.7k | block_dc - residual_sum * (1.0f / 16); |
485 | 383k | for (size_t iy = 0; iy < 4; iy++) { |
486 | 1.53M | for (size_t ix = 0; ix < 4; ix++) { |
487 | 1.22M | if (ix == 1 && iy == 1) continue; |
488 | 1.15M | pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] = |
489 | 1.15M | coefficients[(y + iy * 2) * 8 + x + ix * 2] + |
490 | 1.15M | pixels[(4 * y + 1) * pixels_stride + 4 * x + 1]; |
491 | 1.15M | } |
492 | 307k | } |
493 | 76.7k | pixels[y * 4 * pixels_stride + x * 4] = |
494 | 76.7k | coefficients[(y + 2) * 8 + x + 2] + |
495 | 76.7k | pixels[(4 * y + 1) * pixels_stride + 4 * x + 1]; |
496 | 76.7k | } |
497 | 38.3k | } |
498 | 19.1k | break; |
499 | 0 | } |
500 | 873 | case Type::DCT8X4: { |
501 | 873 | float dcs[2] = {}; |
502 | 873 | float block0 = coefficients[0]; |
503 | 873 | float block1 = coefficients[8]; |
504 | 873 | dcs[0] = block0 + block1; |
505 | 873 | dcs[1] = block0 - block1; |
506 | 2.61k | for (size_t x = 0; x < 2; x++) { |
507 | 1.74k | HWY_ALIGN float block[4 * 8]; |
508 | 1.74k | block[0] = dcs[x]; |
509 | 8.73k | for (size_t iy = 0; iy < 4; iy++) { |
510 | 62.8k | for (size_t ix = 0; ix < 8; ix++) { |
511 | 55.8k | if (ix == 0 && iy == 0) continue; |
512 | 54.1k | block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix]; |
513 | 54.1k | } |
514 | 6.98k | } |
515 | 1.74k | ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride), |
516 | 1.74k | scratch_space); |
517 | 1.74k | } |
518 | 873 | break; |
519 | 0 | } |
520 | 645 | case Type::DCT4X8: { |
521 | 645 | float dcs[2] = {}; |
522 | 645 | float block0 = coefficients[0]; |
523 | 645 | float block1 = coefficients[8]; |
524 | 645 | dcs[0] = block0 + block1; |
525 | 645 | dcs[1] = block0 - block1; |
526 | 1.93k | for (size_t y = 0; y < 2; y++) { |
527 | 1.29k | HWY_ALIGN float block[4 * 8]; |
528 | 1.29k | block[0] = dcs[y]; |
529 | 6.45k | for (size_t iy = 0; iy < 4; iy++) { |
530 | 46.4k | for (size_t ix = 0; ix < 8; ix++) { |
531 | 41.2k | if (ix == 0 && iy == 0) continue; |
532 | 39.9k | block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix]; |
533 | 39.9k | } |
534 | 5.16k | } |
535 | 1.29k | ComputeScaledIDCT<4, 8>()( |
536 | 1.29k | block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride), |
537 | 1.29k | scratch_space); |
538 | 1.29k | } |
539 | 645 | break; |
540 | 0 | } |
541 | 0 | case Type::DCT4X4: { |
542 | 0 | float dcs[4] = {}; |
543 | 0 | float block00 = coefficients[0]; |
544 | 0 | float block01 = coefficients[1]; |
545 | 0 | float block10 = coefficients[8]; |
546 | 0 | float block11 = coefficients[9]; |
547 | 0 | dcs[0] = block00 + block01 + block10 + block11; |
548 | 0 | dcs[1] = block00 + block01 - block10 - block11; |
549 | 0 | dcs[2] = block00 - block01 + block10 - block11; |
550 | 0 | dcs[3] = block00 - block01 - block10 + block11; |
551 | 0 | for (size_t y = 0; y < 2; y++) { |
552 | 0 | for (size_t x = 0; x < 2; x++) { |
553 | 0 | HWY_ALIGN float block[4 * 4]; |
554 | 0 | block[0] = dcs[y * 2 + x]; |
555 | 0 | for (size_t iy = 0; iy < 4; iy++) { |
556 | 0 | for (size_t ix = 0; ix < 4; ix++) { |
557 | 0 | if (ix == 0 && iy == 0) continue; |
558 | 0 | block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2]; |
559 | 0 | } |
560 | 0 | } |
561 | 0 | ComputeScaledIDCT<4, 4>()( |
562 | 0 | block, |
563 | 0 | DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride), |
564 | 0 | scratch_space); |
565 | 0 | } |
566 | 0 | } |
567 | 0 | break; |
568 | 0 | } |
569 | 147 | case Type::DCT2X2: { |
570 | 147 | HWY_ALIGN float coeffs[kDCTBlockSize]; |
571 | 147 | memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize); |
572 | 147 | IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs); |
573 | 147 | IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs); |
574 | 147 | IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs); |
575 | 1.32k | for (size_t y = 0; y < kBlockDim; y++) { |
576 | 10.5k | for (size_t x = 0; x < kBlockDim; x++) { |
577 | 9.40k | pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x]; |
578 | 9.40k | } |
579 | 1.17k | } |
580 | 147 | break; |
581 | 0 | } |
582 | 807 | case Type::DCT16X16: { |
583 | 807 | ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride), |
584 | 807 | scratch_space); |
585 | 807 | break; |
586 | 0 | } |
587 | 372 | case Type::DCT16X8: { |
588 | 372 | ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride), |
589 | 372 | scratch_space); |
590 | 372 | break; |
591 | 0 | } |
592 | 52.7k | case Type::DCT8X16: { |
593 | 52.7k | ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride), |
594 | 52.7k | scratch_space); |
595 | 52.7k | break; |
596 | 0 | } |
597 | 0 | case Type::DCT32X8: { |
598 | 0 | ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride), |
599 | 0 | scratch_space); |
600 | 0 | break; |
601 | 0 | } |
602 | 0 | case Type::DCT8X32: { |
603 | 0 | ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride), |
604 | 0 | scratch_space); |
605 | 0 | break; |
606 | 0 | } |
607 | 204 | case Type::DCT32X16: { |
608 | 204 | ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride), |
609 | 204 | scratch_space); |
610 | 204 | break; |
611 | 0 | } |
612 | 102 | case Type::DCT16X32: { |
613 | 102 | ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride), |
614 | 102 | scratch_space); |
615 | 102 | break; |
616 | 0 | } |
617 | 96 | case Type::DCT32X32: { |
618 | 96 | ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride), |
619 | 96 | scratch_space); |
620 | 96 | break; |
621 | 0 | } |
622 | 16.7k | case Type::DCT: { |
623 | 16.7k | ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride), |
624 | 16.7k | scratch_space); |
625 | 16.7k | break; |
626 | 0 | } |
627 | 54 | case Type::AFV0: { |
628 | 54 | AFVTransformToPixels<0>(coefficients, pixels, pixels_stride); |
629 | 54 | break; |
630 | 0 | } |
631 | 30 | case Type::AFV1: { |
632 | 30 | AFVTransformToPixels<1>(coefficients, pixels, pixels_stride); |
633 | 30 | break; |
634 | 0 | } |
635 | 495 | case Type::AFV2: { |
636 | 495 | AFVTransformToPixels<2>(coefficients, pixels, pixels_stride); |
637 | 495 | break; |
638 | 0 | } |
639 | 6.64k | case Type::AFV3: { |
640 | 6.64k | AFVTransformToPixels<3>(coefficients, pixels, pixels_stride); |
641 | 6.64k | break; |
642 | 0 | } |
643 | 0 | case Type::DCT64X32: { |
644 | 0 | ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride), |
645 | 0 | scratch_space); |
646 | 0 | break; |
647 | 0 | } |
648 | 0 | case Type::DCT32X64: { |
649 | 0 | ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride), |
650 | 0 | scratch_space); |
651 | 0 | break; |
652 | 0 | } |
653 | 0 | case Type::DCT64X64: { |
654 | 0 | ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride), |
655 | 0 | scratch_space); |
656 | 0 | break; |
657 | 0 | } |
658 | 0 | case Type::DCT128X64: { |
659 | 0 | ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride), |
660 | 0 | scratch_space); |
661 | 0 | break; |
662 | 0 | } |
663 | 0 | case Type::DCT64X128: { |
664 | 0 | ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride), |
665 | 0 | scratch_space); |
666 | 0 | break; |
667 | 0 | } |
668 | 0 | case Type::DCT128X128: { |
669 | 0 | ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride), |
670 | 0 | scratch_space); |
671 | 0 | break; |
672 | 0 | } |
673 | 0 | case Type::DCT256X128: { |
674 | 0 | ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride), |
675 | 0 | scratch_space); |
676 | 0 | break; |
677 | 0 | } |
678 | 0 | case Type::DCT128X256: { |
679 | 0 | ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride), |
680 | 0 | scratch_space); |
681 | 0 | break; |
682 | 0 | } |
683 | 0 | case Type::DCT256X256: { |
684 | 0 | ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride), |
685 | 0 | scratch_space); |
686 | 0 | break; |
687 | 0 | } |
688 | 99.1k | } |
689 | 99.1k | } Unexecuted instantiation: enc_group.cc:jxl::N_SCALAR::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*) dec_group.cc:jxl::N_SCALAR::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*) Line | Count | Source | 460 | 99.1k | float* scratch_space) { | 461 | 99.1k | using Type = AcStrategyType; | 462 | 99.1k | switch (strategy) { | 463 | 19.1k | case Type::IDENTITY: { | 464 | 19.1k | float dcs[4] = {}; | 465 | 19.1k | float block00 = coefficients[0]; | 466 | 19.1k | float block01 = coefficients[1]; | 467 | 19.1k | float block10 = coefficients[8]; | 468 | 19.1k | float block11 = coefficients[9]; | 469 | 19.1k | dcs[0] = block00 + block01 + block10 + block11; | 470 | 19.1k | dcs[1] = block00 + block01 - block10 - block11; | 471 | 19.1k | dcs[2] = block00 - block01 + block10 - block11; | 472 | 19.1k | dcs[3] = block00 - block01 - block10 + block11; | 473 | 57.5k | for (size_t y = 0; y < 2; y++) { | 474 | 115k | for (size_t x = 0; x < 2; x++) { | 475 | 76.7k | float block_dc = dcs[y * 2 + x]; | 476 | 76.7k | float residual_sum = 0; | 477 | 383k | for (size_t iy = 0; iy < 4; iy++) { | 478 | 1.53M | for (size_t ix = 0; ix < 4; ix++) { | 479 | 1.22M | if (ix == 0 && iy == 0) continue; | 480 | 1.15M | residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2]; | 481 | 1.15M | } | 482 | 307k | } | 483 | 76.7k | pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] = | 484 | 76.7k | block_dc - residual_sum * (1.0f / 16); | 485 | 383k | for (size_t iy = 0; iy < 4; iy++) { | 486 | 1.53M | for (size_t ix = 0; ix < 4; ix++) { | 487 | 1.22M | if (ix == 1 && iy == 1) continue; | 488 | 1.15M | pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] = | 489 | 1.15M | coefficients[(y + iy * 2) * 8 + x + ix * 2] + | 490 | 1.15M | pixels[(4 * y + 1) * pixels_stride + 4 * x + 1]; | 491 | 1.15M | } | 492 | 307k | } | 493 | 76.7k | pixels[y * 4 * pixels_stride + x * 4] = | 494 | 76.7k | coefficients[(y + 2) * 8 + x + 2] + | 495 | 76.7k | pixels[(4 * y + 1) * pixels_stride + 4 * x + 1]; | 496 | 76.7k | } | 497 | 38.3k | } | 498 | 19.1k | break; | 499 | 0 | } | 500 | 873 | case Type::DCT8X4: { | 501 | 873 | float dcs[2] = {}; | 502 | 873 | float block0 = coefficients[0]; | 503 | 873 | float block1 = coefficients[8]; | 504 | 873 | dcs[0] = block0 + block1; | 505 | 873 | dcs[1] = block0 - block1; | 506 | 2.61k | for (size_t x = 0; x < 2; x++) { | 507 | 1.74k | HWY_ALIGN float block[4 * 8]; | 508 | 1.74k | block[0] = dcs[x]; | 509 | 8.73k | for (size_t iy = 0; iy < 4; iy++) { | 510 | 62.8k | for (size_t ix = 0; ix < 8; ix++) { | 511 | 55.8k | if (ix == 0 && iy == 0) continue; | 512 | 54.1k | block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix]; | 513 | 54.1k | } | 514 | 6.98k | } | 515 | 1.74k | ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride), | 516 | 1.74k | scratch_space); | 517 | 1.74k | } | 518 | 873 | break; | 519 | 0 | } | 520 | 645 | case Type::DCT4X8: { | 521 | 645 | float dcs[2] = {}; | 522 | 645 | float block0 = coefficients[0]; | 523 | 645 | float block1 = coefficients[8]; | 524 | 645 | dcs[0] = block0 + block1; | 525 | 645 | dcs[1] = block0 - block1; | 526 | 1.93k | for (size_t y = 0; y < 2; y++) { | 527 | 1.29k | HWY_ALIGN float block[4 * 8]; | 528 | 1.29k | block[0] = dcs[y]; | 529 | 6.45k | for (size_t iy = 0; iy < 4; iy++) { | 530 | 46.4k | for (size_t ix = 0; ix < 8; ix++) { | 531 | 41.2k | if (ix == 0 && iy == 0) continue; | 532 | 39.9k | block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix]; | 533 | 39.9k | } | 534 | 5.16k | } | 535 | 1.29k | ComputeScaledIDCT<4, 8>()( | 536 | 1.29k | block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride), | 537 | 1.29k | scratch_space); | 538 | 1.29k | } | 539 | 645 | break; | 540 | 0 | } | 541 | 0 | case Type::DCT4X4: { | 542 | 0 | float dcs[4] = {}; | 543 | 0 | float block00 = coefficients[0]; | 544 | 0 | float block01 = coefficients[1]; | 545 | 0 | float block10 = coefficients[8]; | 546 | 0 | float block11 = coefficients[9]; | 547 | 0 | dcs[0] = block00 + block01 + block10 + block11; | 548 | 0 | dcs[1] = block00 + block01 - block10 - block11; | 549 | 0 | dcs[2] = block00 - block01 + block10 - block11; | 550 | 0 | dcs[3] = block00 - block01 - block10 + block11; | 551 | 0 | for (size_t y = 0; y < 2; y++) { | 552 | 0 | for (size_t x = 0; x < 2; x++) { | 553 | 0 | HWY_ALIGN float block[4 * 4]; | 554 | 0 | block[0] = dcs[y * 2 + x]; | 555 | 0 | for (size_t iy = 0; iy < 4; iy++) { | 556 | 0 | for (size_t ix = 0; ix < 4; ix++) { | 557 | 0 | if (ix == 0 && iy == 0) continue; | 558 | 0 | block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2]; | 559 | 0 | } | 560 | 0 | } | 561 | 0 | ComputeScaledIDCT<4, 4>()( | 562 | 0 | block, | 563 | 0 | DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride), | 564 | 0 | scratch_space); | 565 | 0 | } | 566 | 0 | } | 567 | 0 | break; | 568 | 0 | } | 569 | 147 | case Type::DCT2X2: { | 570 | 147 | HWY_ALIGN float coeffs[kDCTBlockSize]; | 571 | 147 | memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize); | 572 | 147 | IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs); | 573 | 147 | IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs); | 574 | 147 | IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs); | 575 | 1.32k | for (size_t y = 0; y < kBlockDim; y++) { | 576 | 10.5k | for (size_t x = 0; x < kBlockDim; x++) { | 577 | 9.40k | pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x]; | 578 | 9.40k | } | 579 | 1.17k | } | 580 | 147 | break; | 581 | 0 | } | 582 | 807 | case Type::DCT16X16: { | 583 | 807 | ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride), | 584 | 807 | scratch_space); | 585 | 807 | break; | 586 | 0 | } | 587 | 372 | case Type::DCT16X8: { | 588 | 372 | ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride), | 589 | 372 | scratch_space); | 590 | 372 | break; | 591 | 0 | } | 592 | 52.7k | case Type::DCT8X16: { | 593 | 52.7k | ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride), | 594 | 52.7k | scratch_space); | 595 | 52.7k | break; | 596 | 0 | } | 597 | 0 | case Type::DCT32X8: { | 598 | 0 | ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride), | 599 | 0 | scratch_space); | 600 | 0 | break; | 601 | 0 | } | 602 | 0 | case Type::DCT8X32: { | 603 | 0 | ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride), | 604 | 0 | scratch_space); | 605 | 0 | break; | 606 | 0 | } | 607 | 204 | case Type::DCT32X16: { | 608 | 204 | ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride), | 609 | 204 | scratch_space); | 610 | 204 | break; | 611 | 0 | } | 612 | 102 | case Type::DCT16X32: { | 613 | 102 | ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride), | 614 | 102 | scratch_space); | 615 | 102 | break; | 616 | 0 | } | 617 | 96 | case Type::DCT32X32: { | 618 | 96 | ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride), | 619 | 96 | scratch_space); | 620 | 96 | break; | 621 | 0 | } | 622 | 16.7k | case Type::DCT: { | 623 | 16.7k | ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride), | 624 | 16.7k | scratch_space); | 625 | 16.7k | break; | 626 | 0 | } | 627 | 54 | case Type::AFV0: { | 628 | 54 | AFVTransformToPixels<0>(coefficients, pixels, pixels_stride); | 629 | 54 | break; | 630 | 0 | } | 631 | 30 | case Type::AFV1: { | 632 | 30 | AFVTransformToPixels<1>(coefficients, pixels, pixels_stride); | 633 | 30 | break; | 634 | 0 | } | 635 | 495 | case Type::AFV2: { | 636 | 495 | AFVTransformToPixels<2>(coefficients, pixels, pixels_stride); | 637 | 495 | break; | 638 | 0 | } | 639 | 6.64k | case Type::AFV3: { | 640 | 6.64k | AFVTransformToPixels<3>(coefficients, pixels, pixels_stride); | 641 | 6.64k | break; | 642 | 0 | } | 643 | 0 | case Type::DCT64X32: { | 644 | 0 | ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride), | 645 | 0 | scratch_space); | 646 | 0 | break; | 647 | 0 | } | 648 | 0 | case Type::DCT32X64: { | 649 | 0 | ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride), | 650 | 0 | scratch_space); | 651 | 0 | break; | 652 | 0 | } | 653 | 0 | case Type::DCT64X64: { | 654 | 0 | ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride), | 655 | 0 | scratch_space); | 656 | 0 | break; | 657 | 0 | } | 658 | 0 | case Type::DCT128X64: { | 659 | 0 | ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride), | 660 | 0 | scratch_space); | 661 | 0 | break; | 662 | 0 | } | 663 | 0 | case Type::DCT64X128: { | 664 | 0 | ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride), | 665 | 0 | scratch_space); | 666 | 0 | break; | 667 | 0 | } | 668 | 0 | case Type::DCT128X128: { | 669 | 0 | ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride), | 670 | 0 | scratch_space); | 671 | 0 | break; | 672 | 0 | } | 673 | 0 | case Type::DCT256X128: { | 674 | 0 | ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride), | 675 | 0 | scratch_space); | 676 | 0 | break; | 677 | 0 | } | 678 | 0 | case Type::DCT128X256: { | 679 | 0 | ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride), | 680 | 0 | scratch_space); | 681 | 0 | break; | 682 | 0 | } | 683 | 0 | case Type::DCT256X256: { | 684 | 0 | ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride), | 685 | 0 | scratch_space); | 686 | 0 | break; | 687 | 0 | } | 688 | 99.1k | } | 689 | 99.1k | } |
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SCALAR::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SCALAR::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*) |
690 | | |
691 | | HWY_MAYBE_UNUSED void LowestFrequenciesFromDC(const AcStrategyType strategy, |
692 | | const float* dc, size_t dc_stride, |
693 | | float* llf, |
694 | 99.1k | float* JXL_RESTRICT scratch) { |
695 | 99.1k | using Type = AcStrategyType; |
696 | 99.1k | HWY_ALIGN float warm_block[4 * 4]; |
697 | 99.1k | HWY_ALIGN float warm_scratch_space[4 * 4 * 4]; |
698 | 99.1k | switch (strategy) { |
699 | 372 | case Type::DCT16X8: { |
700 | 372 | ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim, |
701 | 372 | /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>( |
702 | 372 | dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space); |
703 | 372 | break; |
704 | 0 | } |
705 | 52.7k | case Type::DCT8X16: { |
706 | 52.7k | ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim, |
707 | 52.7k | /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>( |
708 | 52.7k | dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space); |
709 | 52.7k | break; |
710 | 0 | } |
711 | 807 | case Type::DCT16X16: { |
712 | 807 | ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim, |
713 | 807 | /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>( |
714 | 807 | dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space); |
715 | 807 | break; |
716 | 0 | } |
717 | 0 | case Type::DCT32X8: { |
718 | 0 | ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim, |
719 | 0 | /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>( |
720 | 0 | dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space); |
721 | 0 | break; |
722 | 0 | } |
723 | 0 | case Type::DCT8X32: { |
724 | 0 | ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim, |
725 | 0 | /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>( |
726 | 0 | dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space); |
727 | 0 | break; |
728 | 0 | } |
729 | 204 | case Type::DCT32X16: { |
730 | 204 | ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim, |
731 | 204 | /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>( |
732 | 204 | dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space); |
733 | 204 | break; |
734 | 0 | } |
735 | 102 | case Type::DCT16X32: { |
736 | 102 | ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim, |
737 | 102 | /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>( |
738 | 102 | dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space); |
739 | 102 | break; |
740 | 0 | } |
741 | 96 | case Type::DCT32X32: { |
742 | 96 | ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim, |
743 | 96 | /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>( |
744 | 96 | dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space); |
745 | 96 | break; |
746 | 0 | } |
747 | 0 | case Type::DCT64X32: { |
748 | 0 | ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim, |
749 | 0 | /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>( |
750 | 0 | dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 4); |
751 | 0 | break; |
752 | 0 | } |
753 | 0 | case Type::DCT32X64: { |
754 | 0 | ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim, |
755 | 0 | /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>( |
756 | 0 | dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 4 * 8); |
757 | 0 | break; |
758 | 0 | } |
759 | 0 | case Type::DCT64X64: { |
760 | 0 | ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim, |
761 | 0 | /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>( |
762 | 0 | dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 8); |
763 | 0 | break; |
764 | 0 | } |
765 | 0 | case Type::DCT128X64: { |
766 | 0 | ReinterpretingDCT</*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim, |
767 | 0 | /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>( |
768 | 0 | dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 8); |
769 | 0 | break; |
770 | 0 | } |
771 | 0 | case Type::DCT64X128: { |
772 | 0 | ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim, |
773 | 0 | /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>( |
774 | 0 | dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 8 * 16); |
775 | 0 | break; |
776 | 0 | } |
777 | 0 | case Type::DCT128X128: { |
778 | 0 | ReinterpretingDCT< |
779 | 0 | /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim, |
780 | 0 | /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>( |
781 | 0 | dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 16); |
782 | 0 | break; |
783 | 0 | } |
784 | 0 | case Type::DCT256X128: { |
785 | 0 | ReinterpretingDCT< |
786 | 0 | /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim, |
787 | 0 | /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>( |
788 | 0 | dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 16); |
789 | 0 | break; |
790 | 0 | } |
791 | 0 | case Type::DCT128X256: { |
792 | 0 | ReinterpretingDCT< |
793 | 0 | /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim, |
794 | 0 | /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>( |
795 | 0 | dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 16 * 32); |
796 | 0 | break; |
797 | 0 | } |
798 | 0 | case Type::DCT256X256: { |
799 | 0 | ReinterpretingDCT< |
800 | 0 | /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim, |
801 | 0 | /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>( |
802 | 0 | dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 32); |
803 | 0 | break; |
804 | 0 | } |
805 | 16.7k | case Type::DCT: |
806 | 16.9k | case Type::DCT2X2: |
807 | 16.9k | case Type::DCT4X4: |
808 | 17.5k | case Type::DCT4X8: |
809 | 18.4k | case Type::DCT8X4: |
810 | 18.4k | case Type::AFV0: |
811 | 18.5k | case Type::AFV1: |
812 | 19.0k | case Type::AFV2: |
813 | 25.6k | case Type::AFV3: |
814 | 44.8k | case Type::IDENTITY: |
815 | 44.8k | llf[0] = dc[0]; |
816 | 44.8k | break; |
817 | 99.2k | }; |
818 | 99.2k | } Unexecuted instantiation: enc_group.cc:jxl::N_SCALAR::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*) dec_group.cc:jxl::N_SCALAR::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*) Line | Count | Source | 694 | 99.1k | float* JXL_RESTRICT scratch) { | 695 | 99.1k | using Type = AcStrategyType; | 696 | 99.1k | HWY_ALIGN float warm_block[4 * 4]; | 697 | 99.1k | HWY_ALIGN float warm_scratch_space[4 * 4 * 4]; | 698 | 99.1k | switch (strategy) { | 699 | 372 | case Type::DCT16X8: { | 700 | 372 | ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim, | 701 | 372 | /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>( | 702 | 372 | dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space); | 703 | 372 | break; | 704 | 0 | } | 705 | 52.7k | case Type::DCT8X16: { | 706 | 52.7k | ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim, | 707 | 52.7k | /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>( | 708 | 52.7k | dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space); | 709 | 52.7k | break; | 710 | 0 | } | 711 | 807 | case Type::DCT16X16: { | 712 | 807 | ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim, | 713 | 807 | /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>( | 714 | 807 | dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space); | 715 | 807 | break; | 716 | 0 | } | 717 | 0 | case Type::DCT32X8: { | 718 | 0 | ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim, | 719 | 0 | /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>( | 720 | 0 | dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space); | 721 | 0 | break; | 722 | 0 | } | 723 | 0 | case Type::DCT8X32: { | 724 | 0 | ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim, | 725 | 0 | /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>( | 726 | 0 | dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space); | 727 | 0 | break; | 728 | 0 | } | 729 | 204 | case Type::DCT32X16: { | 730 | 204 | ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim, | 731 | 204 | /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>( | 732 | 204 | dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space); | 733 | 204 | break; | 734 | 0 | } | 735 | 102 | case Type::DCT16X32: { | 736 | 102 | ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim, | 737 | 102 | /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>( | 738 | 102 | dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space); | 739 | 102 | break; | 740 | 0 | } | 741 | 96 | case Type::DCT32X32: { | 742 | 96 | ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim, | 743 | 96 | /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>( | 744 | 96 | dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space); | 745 | 96 | break; | 746 | 0 | } | 747 | 0 | case Type::DCT64X32: { | 748 | 0 | ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim, | 749 | 0 | /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>( | 750 | 0 | dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 4); | 751 | 0 | break; | 752 | 0 | } | 753 | 0 | case Type::DCT32X64: { | 754 | 0 | ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim, | 755 | 0 | /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>( | 756 | 0 | dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 4 * 8); | 757 | 0 | break; | 758 | 0 | } | 759 | 0 | case Type::DCT64X64: { | 760 | 0 | ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim, | 761 | 0 | /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>( | 762 | 0 | dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 8); | 763 | 0 | break; | 764 | 0 | } | 765 | 0 | case Type::DCT128X64: { | 766 | 0 | ReinterpretingDCT</*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim, | 767 | 0 | /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>( | 768 | 0 | dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 8); | 769 | 0 | break; | 770 | 0 | } | 771 | 0 | case Type::DCT64X128: { | 772 | 0 | ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim, | 773 | 0 | /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>( | 774 | 0 | dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 8 * 16); | 775 | 0 | break; | 776 | 0 | } | 777 | 0 | case Type::DCT128X128: { | 778 | 0 | ReinterpretingDCT< | 779 | 0 | /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim, | 780 | 0 | /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>( | 781 | 0 | dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 16); | 782 | 0 | break; | 783 | 0 | } | 784 | 0 | case Type::DCT256X128: { | 785 | 0 | ReinterpretingDCT< | 786 | 0 | /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim, | 787 | 0 | /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>( | 788 | 0 | dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 16); | 789 | 0 | break; | 790 | 0 | } | 791 | 0 | case Type::DCT128X256: { | 792 | 0 | ReinterpretingDCT< | 793 | 0 | /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim, | 794 | 0 | /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>( | 795 | 0 | dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 16 * 32); | 796 | 0 | break; | 797 | 0 | } | 798 | 0 | case Type::DCT256X256: { | 799 | 0 | ReinterpretingDCT< | 800 | 0 | /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim, | 801 | 0 | /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>( | 802 | 0 | dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 32); | 803 | 0 | break; | 804 | 0 | } | 805 | 16.7k | case Type::DCT: | 806 | 16.9k | case Type::DCT2X2: | 807 | 16.9k | case Type::DCT4X4: | 808 | 17.5k | case Type::DCT4X8: | 809 | 18.4k | case Type::DCT8X4: | 810 | 18.4k | case Type::AFV0: | 811 | 18.5k | case Type::AFV1: | 812 | 19.0k | case Type::AFV2: | 813 | 25.6k | case Type::AFV3: | 814 | 44.8k | case Type::IDENTITY: | 815 | 44.8k | llf[0] = dc[0]; | 816 | 44.8k | break; | 817 | 99.2k | }; | 818 | 99.2k | } |
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SCALAR::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SCALAR::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*) |
819 | | |
820 | | } // namespace |
821 | | // NOLINTNEXTLINE(google-readability-namespace-comments) |
822 | | } // namespace HWY_NAMESPACE |
823 | | } // namespace jxl |
824 | | HWY_AFTER_NAMESPACE(); |
825 | | |
826 | | #endif // LIB_JXL_DEC_TRANSFORMS_INL_H_ |