/src/libjxl/lib/jxl/enc_transforms-inl.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | |
6 | | #include "lib/jxl/base/compiler_specific.h" |
7 | | #include "lib/jxl/frame_dimensions.h" |
8 | | |
9 | | #if defined(LIB_JXL_ENC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE) |
10 | | #ifdef LIB_JXL_ENC_TRANSFORMS_INL_H_ |
11 | | #undef LIB_JXL_ENC_TRANSFORMS_INL_H_ |
12 | | #else |
13 | | #define LIB_JXL_ENC_TRANSFORMS_INL_H_ |
14 | | #endif |
15 | | |
16 | | #include <cstddef> |
17 | | #include <cstdint> |
18 | | #include <hwy/highway.h> |
19 | | |
20 | | #include "lib/jxl/ac_strategy.h" |
21 | | #include "lib/jxl/dct-inl.h" |
22 | | #include "lib/jxl/dct_scales.h" |
23 | | |
24 | | HWY_BEFORE_NAMESPACE(); |
25 | | namespace jxl { |
26 | | |
27 | | enum class AcStrategyType : uint32_t; |
28 | | |
29 | | namespace HWY_NAMESPACE { |
30 | | namespace { |
31 | | |
32 | | constexpr size_t kMaxBlocks = 32; |
33 | | |
34 | | // Inverse of ReinterpretingDCT. |
35 | | template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS, |
36 | | size_t ROWS, size_t COLS> |
37 | | HWY_INLINE void ReinterpretingIDCT(const float* input, |
38 | | const size_t input_stride, float* output, |
39 | 195k | const size_t output_stride, float* scratch) { |
40 | 195k | static_assert(ROWS <= kMaxBlocks, "Unsupported block size"); |
41 | 195k | static_assert(COLS <= kMaxBlocks, "Unsupported block size"); |
42 | 195k | float* block = scratch; |
43 | 195k | if (ROWS < COLS) { |
44 | 153k | for (size_t y = 0; y < LF_ROWS; y++) { |
45 | 313k | for (size_t x = 0; x < LF_COLS; x++) { |
46 | 229k | block[y * COLS + x] = input[y * input_stride + x] * |
47 | 229k | DCTTotalResampleScale<DCT_ROWS, ROWS>(y) * |
48 | 229k | DCTTotalResampleScale<DCT_COLS, COLS>(x); |
49 | 229k | } |
50 | 84.1k | } |
51 | 126k | } else { |
52 | 394k | for (size_t y = 0; y < LF_COLS; y++) { |
53 | 1.17M | for (size_t x = 0; x < LF_ROWS; x++) { |
54 | 904k | block[y * ROWS + x] = input[y * input_stride + x] * |
55 | 904k | DCTTotalResampleScale<DCT_COLS, COLS>(y) * |
56 | 904k | DCTTotalResampleScale<DCT_ROWS, ROWS>(x); |
57 | 904k | } |
58 | 268k | } |
59 | 126k | } |
60 | | |
61 | 195k | float* scratch_space = scratch + kMaxBlocks * kMaxBlocks; |
62 | 195k | ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride), |
63 | 195k | scratch_space); |
64 | 195k | } Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*) enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*) Line | Count | Source | 39 | 24.7k | const size_t output_stride, float* scratch) { | 40 | 24.7k | static_assert(ROWS <= kMaxBlocks, "Unsupported block size"); | 41 | 24.7k | static_assert(COLS <= kMaxBlocks, "Unsupported block size"); | 42 | 24.7k | float* block = scratch; | 43 | 24.7k | if (ROWS < COLS) { | 44 | 0 | for (size_t y = 0; y < LF_ROWS; y++) { | 45 | 0 | for (size_t x = 0; x < LF_COLS; x++) { | 46 | 0 | block[y * COLS + x] = input[y * input_stride + x] * | 47 | 0 | DCTTotalResampleScale<DCT_ROWS, ROWS>(y) * | 48 | 0 | DCTTotalResampleScale<DCT_COLS, COLS>(x); | 49 | 0 | } | 50 | 0 | } | 51 | 24.7k | } else { | 52 | 49.4k | for (size_t y = 0; y < LF_COLS; y++) { | 53 | 74.2k | for (size_t x = 0; x < LF_ROWS; x++) { | 54 | 49.4k | block[y * ROWS + x] = input[y * input_stride + x] * | 55 | 49.4k | DCTTotalResampleScale<DCT_COLS, COLS>(y) * | 56 | 49.4k | DCTTotalResampleScale<DCT_ROWS, ROWS>(x); | 57 | 49.4k | } | 58 | 24.7k | } | 59 | 24.7k | } | 60 | | | 61 | 24.7k | float* scratch_space = scratch + kMaxBlocks * kMaxBlocks; | 62 | 24.7k | ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride), | 63 | 24.7k | scratch_space); | 64 | 24.7k | } |
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*) Line | Count | Source | 39 | 27.7k | const size_t output_stride, float* scratch) { | 40 | 27.7k | static_assert(ROWS <= kMaxBlocks, "Unsupported block size"); | 41 | 27.7k | static_assert(COLS <= kMaxBlocks, "Unsupported block size"); | 42 | 27.7k | float* block = scratch; | 43 | 27.7k | if (ROWS < COLS) { | 44 | 55.4k | for (size_t y = 0; y < LF_ROWS; y++) { | 45 | 83.2k | for (size_t x = 0; x < LF_COLS; x++) { | 46 | 55.4k | block[y * COLS + x] = input[y * input_stride + x] * | 47 | 55.4k | DCTTotalResampleScale<DCT_ROWS, ROWS>(y) * | 48 | 55.4k | DCTTotalResampleScale<DCT_COLS, COLS>(x); | 49 | 55.4k | } | 50 | 27.7k | } | 51 | 27.7k | } else { | 52 | 0 | for (size_t y = 0; y < LF_COLS; y++) { | 53 | 0 | for (size_t x = 0; x < LF_ROWS; x++) { | 54 | 0 | block[y * ROWS + x] = input[y * input_stride + x] * | 55 | 0 | DCTTotalResampleScale<DCT_COLS, COLS>(y) * | 56 | 0 | DCTTotalResampleScale<DCT_ROWS, ROWS>(x); | 57 | 0 | } | 58 | 0 | } | 59 | 0 | } | 60 | | | 61 | 27.7k | float* scratch_space = scratch + kMaxBlocks * kMaxBlocks; | 62 | 27.7k | ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride), | 63 | 27.7k | scratch_space); | 64 | 27.7k | } |
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*) Line | Count | Source | 39 | 19.1k | const size_t output_stride, float* scratch) { | 40 | 19.1k | static_assert(ROWS <= kMaxBlocks, "Unsupported block size"); | 41 | 19.1k | static_assert(COLS <= kMaxBlocks, "Unsupported block size"); | 42 | 19.1k | float* block = scratch; | 43 | 19.1k | if (ROWS < COLS) { | 44 | 0 | for (size_t y = 0; y < LF_ROWS; y++) { | 45 | 0 | for (size_t x = 0; x < LF_COLS; x++) { | 46 | 0 | block[y * COLS + x] = input[y * input_stride + x] * | 47 | 0 | DCTTotalResampleScale<DCT_ROWS, ROWS>(y) * | 48 | 0 | DCTTotalResampleScale<DCT_COLS, COLS>(x); | 49 | 0 | } | 50 | 0 | } | 51 | 19.1k | } else { | 52 | 57.4k | for (size_t y = 0; y < LF_COLS; y++) { | 53 | 114k | for (size_t x = 0; x < LF_ROWS; x++) { | 54 | 76.6k | block[y * ROWS + x] = input[y * input_stride + x] * | 55 | 76.6k | DCTTotalResampleScale<DCT_COLS, COLS>(y) * | 56 | 76.6k | DCTTotalResampleScale<DCT_ROWS, ROWS>(x); | 57 | 76.6k | } | 58 | 38.3k | } | 59 | 19.1k | } | 60 | | | 61 | 19.1k | float* scratch_space = scratch + kMaxBlocks * kMaxBlocks; | 62 | 19.1k | ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride), | 63 | 19.1k | scratch_space); | 64 | 19.1k | } |
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*) Line | Count | Source | 39 | 5.57k | const size_t output_stride, float* scratch) { | 40 | 5.57k | static_assert(ROWS <= kMaxBlocks, "Unsupported block size"); | 41 | 5.57k | static_assert(COLS <= kMaxBlocks, "Unsupported block size"); | 42 | 5.57k | float* block = scratch; | 43 | 5.57k | if (ROWS < COLS) { | 44 | 0 | for (size_t y = 0; y < LF_ROWS; y++) { | 45 | 0 | for (size_t x = 0; x < LF_COLS; x++) { | 46 | 0 | block[y * COLS + x] = input[y * input_stride + x] * | 47 | 0 | DCTTotalResampleScale<DCT_ROWS, ROWS>(y) * | 48 | 0 | DCTTotalResampleScale<DCT_COLS, COLS>(x); | 49 | 0 | } | 50 | 0 | } | 51 | 5.57k | } else { | 52 | 16.7k | for (size_t y = 0; y < LF_COLS; y++) { | 53 | 55.7k | for (size_t x = 0; x < LF_ROWS; x++) { | 54 | 44.5k | block[y * ROWS + x] = input[y * input_stride + x] * | 55 | 44.5k | DCTTotalResampleScale<DCT_COLS, COLS>(y) * | 56 | 44.5k | DCTTotalResampleScale<DCT_ROWS, ROWS>(x); | 57 | 44.5k | } | 58 | 11.1k | } | 59 | 5.57k | } | 60 | | | 61 | 5.57k | float* scratch_space = scratch + kMaxBlocks * kMaxBlocks; | 62 | 5.57k | ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride), | 63 | 5.57k | scratch_space); | 64 | 5.57k | } |
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Line | Count | Source | 39 | 6.93k | const size_t output_stride, float* scratch) { | 40 | 6.93k | static_assert(ROWS <= kMaxBlocks, "Unsupported block size"); | 41 | 6.93k | static_assert(COLS <= kMaxBlocks, "Unsupported block size"); | 42 | 6.93k | float* block = scratch; | 43 | 6.93k | if (ROWS < COLS) { | 44 | 20.8k | for (size_t y = 0; y < LF_ROWS; y++) { | 45 | 69.3k | for (size_t x = 0; x < LF_COLS; x++) { | 46 | 55.4k | block[y * COLS + x] = input[y * input_stride + x] * | 47 | 55.4k | DCTTotalResampleScale<DCT_ROWS, ROWS>(y) * | 48 | 55.4k | DCTTotalResampleScale<DCT_COLS, COLS>(x); | 49 | 55.4k | } | 50 | 13.8k | } | 51 | 6.93k | } else { | 52 | 0 | for (size_t y = 0; y < LF_COLS; y++) { | 53 | 0 | for (size_t x = 0; x < LF_ROWS; x++) { | 54 | 0 | block[y * ROWS + x] = input[y * input_stride + x] * | 55 | 0 | DCTTotalResampleScale<DCT_COLS, COLS>(y) * | 56 | 0 | DCTTotalResampleScale<DCT_ROWS, ROWS>(x); | 57 | 0 | } | 58 | 0 | } | 59 | 0 | } | 60 | | | 61 | 6.93k | float* scratch_space = scratch + kMaxBlocks * kMaxBlocks; | 62 | 6.93k | ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride), | 63 | 6.93k | scratch_space); | 64 | 6.93k | } |
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Line | Count | Source | 39 | 12.3k | const size_t output_stride, float* scratch) { | 40 | 12.3k | static_assert(ROWS <= kMaxBlocks, "Unsupported block size"); | 41 | 12.3k | static_assert(COLS <= kMaxBlocks, "Unsupported block size"); | 42 | 12.3k | float* block = scratch; | 43 | 12.3k | if (ROWS < COLS) { | 44 | 0 | for (size_t y = 0; y < LF_ROWS; y++) { | 45 | 0 | for (size_t x = 0; x < LF_COLS; x++) { | 46 | 0 | block[y * COLS + x] = input[y * input_stride + x] * | 47 | 0 | DCTTotalResampleScale<DCT_ROWS, ROWS>(y) * | 48 | 0 | DCTTotalResampleScale<DCT_COLS, COLS>(x); | 49 | 0 | } | 50 | 0 | } | 51 | 12.3k | } else { | 52 | 61.6k | for (size_t y = 0; y < LF_COLS; y++) { | 53 | 246k | for (size_t x = 0; x < LF_ROWS; x++) { | 54 | 197k | block[y * ROWS + x] = input[y * input_stride + x] * | 55 | 197k | DCTTotalResampleScale<DCT_COLS, COLS>(y) * | 56 | 197k | DCTTotalResampleScale<DCT_ROWS, ROWS>(x); | 57 | 197k | } | 58 | 49.3k | } | 59 | 12.3k | } | 60 | | | 61 | 12.3k | float* scratch_space = scratch + kMaxBlocks * kMaxBlocks; | 62 | 12.3k | ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride), | 63 | 12.3k | scratch_space); | 64 | 12.3k | } |
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Line | Count | Source | 39 | 162 | const size_t output_stride, float* scratch) { | 40 | 162 | static_assert(ROWS <= kMaxBlocks, "Unsupported block size"); | 41 | 162 | static_assert(COLS <= kMaxBlocks, "Unsupported block size"); | 42 | 162 | float* block = scratch; | 43 | 162 | if (ROWS < COLS) { | 44 | 0 | for (size_t y = 0; y < LF_ROWS; y++) { | 45 | 0 | for (size_t x = 0; x < LF_COLS; x++) { | 46 | 0 | block[y * COLS + x] = input[y * input_stride + x] * | 47 | 0 | DCTTotalResampleScale<DCT_ROWS, ROWS>(y) * | 48 | 0 | DCTTotalResampleScale<DCT_COLS, COLS>(x); | 49 | 0 | } | 50 | 0 | } | 51 | 162 | } else { | 52 | 810 | for (size_t y = 0; y < LF_COLS; y++) { | 53 | 5.83k | for (size_t x = 0; x < LF_ROWS; x++) { | 54 | 5.18k | block[y * ROWS + x] = input[y * input_stride + x] * | 55 | 5.18k | DCTTotalResampleScale<DCT_COLS, COLS>(y) * | 56 | 5.18k | DCTTotalResampleScale<DCT_ROWS, ROWS>(x); | 57 | 5.18k | } | 58 | 648 | } | 59 | 162 | } | 60 | | | 61 | 162 | float* scratch_space = scratch + kMaxBlocks * kMaxBlocks; | 62 | 162 | ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride), | 63 | 162 | scratch_space); | 64 | 162 | } |
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*) Line | Count | Source | 39 | 123 | const size_t output_stride, float* scratch) { | 40 | 123 | static_assert(ROWS <= kMaxBlocks, "Unsupported block size"); | 41 | 123 | static_assert(COLS <= kMaxBlocks, "Unsupported block size"); | 42 | 123 | float* block = scratch; | 43 | 123 | if (ROWS < COLS) { | 44 | 615 | for (size_t y = 0; y < LF_ROWS; y++) { | 45 | 4.42k | for (size_t x = 0; x < LF_COLS; x++) { | 46 | 3.93k | block[y * COLS + x] = input[y * input_stride + x] * | 47 | 3.93k | DCTTotalResampleScale<DCT_ROWS, ROWS>(y) * | 48 | 3.93k | DCTTotalResampleScale<DCT_COLS, COLS>(x); | 49 | 3.93k | } | 50 | 492 | } | 51 | 123 | } else { | 52 | 0 | for (size_t y = 0; y < LF_COLS; y++) { | 53 | 0 | for (size_t x = 0; x < LF_ROWS; x++) { | 54 | 0 | block[y * ROWS + x] = input[y * input_stride + x] * | 55 | 0 | DCTTotalResampleScale<DCT_COLS, COLS>(y) * | 56 | 0 | DCTTotalResampleScale<DCT_ROWS, ROWS>(x); | 57 | 0 | } | 58 | 0 | } | 59 | 0 | } | 60 | | | 61 | 123 | float* scratch_space = scratch + kMaxBlocks * kMaxBlocks; | 62 | 123 | ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride), | 63 | 123 | scratch_space); | 64 | 123 | } |
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*) Line | Count | Source | 39 | 1.23k | const size_t output_stride, float* scratch) { | 40 | 1.23k | static_assert(ROWS <= kMaxBlocks, "Unsupported block size"); | 41 | 1.23k | static_assert(COLS <= kMaxBlocks, "Unsupported block size"); | 42 | 1.23k | float* block = scratch; | 43 | 1.23k | if (ROWS < COLS) { | 44 | 0 | for (size_t y = 0; y < LF_ROWS; y++) { | 45 | 0 | for (size_t x = 0; x < LF_COLS; x++) { | 46 | 0 | block[y * COLS + x] = input[y * input_stride + x] * | 47 | 0 | DCTTotalResampleScale<DCT_ROWS, ROWS>(y) * | 48 | 0 | DCTTotalResampleScale<DCT_COLS, COLS>(x); | 49 | 0 | } | 50 | 0 | } | 51 | 1.23k | } else { | 52 | 11.0k | for (size_t y = 0; y < LF_COLS; y++) { | 53 | 88.7k | for (size_t x = 0; x < LF_ROWS; x++) { | 54 | 78.9k | block[y * ROWS + x] = input[y * input_stride + x] * | 55 | 78.9k | DCTTotalResampleScale<DCT_COLS, COLS>(y) * | 56 | 78.9k | DCTTotalResampleScale<DCT_ROWS, ROWS>(x); | 57 | 78.9k | } | 58 | 9.86k | } | 59 | 1.23k | } | 60 | | | 61 | 1.23k | float* scratch_space = scratch + kMaxBlocks * kMaxBlocks; | 62 | 1.23k | ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride), | 63 | 1.23k | scratch_space); | 64 | 1.23k | } |
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*) enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*) Line | Count | Source | 39 | 24.7k | const size_t output_stride, float* scratch) { | 40 | 24.7k | static_assert(ROWS <= kMaxBlocks, "Unsupported block size"); | 41 | 24.7k | static_assert(COLS <= kMaxBlocks, "Unsupported block size"); | 42 | 24.7k | float* block = scratch; | 43 | 24.7k | if (ROWS < COLS) { | 44 | 0 | for (size_t y = 0; y < LF_ROWS; y++) { | 45 | 0 | for (size_t x = 0; x < LF_COLS; x++) { | 46 | 0 | block[y * COLS + x] = input[y * input_stride + x] * | 47 | 0 | DCTTotalResampleScale<DCT_ROWS, ROWS>(y) * | 48 | 0 | DCTTotalResampleScale<DCT_COLS, COLS>(x); | 49 | 0 | } | 50 | 0 | } | 51 | 24.7k | } else { | 52 | 49.4k | for (size_t y = 0; y < LF_COLS; y++) { | 53 | 74.2k | for (size_t x = 0; x < LF_ROWS; x++) { | 54 | 49.4k | block[y * ROWS + x] = input[y * input_stride + x] * | 55 | 49.4k | DCTTotalResampleScale<DCT_COLS, COLS>(y) * | 56 | 49.4k | DCTTotalResampleScale<DCT_ROWS, ROWS>(x); | 57 | 49.4k | } | 58 | 24.7k | } | 59 | 24.7k | } | 60 | | | 61 | 24.7k | float* scratch_space = scratch + kMaxBlocks * kMaxBlocks; | 62 | 24.7k | ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride), | 63 | 24.7k | scratch_space); | 64 | 24.7k | } |
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*) Line | Count | Source | 39 | 27.7k | const size_t output_stride, float* scratch) { | 40 | 27.7k | static_assert(ROWS <= kMaxBlocks, "Unsupported block size"); | 41 | 27.7k | static_assert(COLS <= kMaxBlocks, "Unsupported block size"); | 42 | 27.7k | float* block = scratch; | 43 | 27.7k | if (ROWS < COLS) { | 44 | 55.4k | for (size_t y = 0; y < LF_ROWS; y++) { | 45 | 83.2k | for (size_t x = 0; x < LF_COLS; x++) { | 46 | 55.4k | block[y * COLS + x] = input[y * input_stride + x] * | 47 | 55.4k | DCTTotalResampleScale<DCT_ROWS, ROWS>(y) * | 48 | 55.4k | DCTTotalResampleScale<DCT_COLS, COLS>(x); | 49 | 55.4k | } | 50 | 27.7k | } | 51 | 27.7k | } else { | 52 | 0 | for (size_t y = 0; y < LF_COLS; y++) { | 53 | 0 | for (size_t x = 0; x < LF_ROWS; x++) { | 54 | 0 | block[y * ROWS + x] = input[y * input_stride + x] * | 55 | 0 | DCTTotalResampleScale<DCT_COLS, COLS>(y) * | 56 | 0 | DCTTotalResampleScale<DCT_ROWS, ROWS>(x); | 57 | 0 | } | 58 | 0 | } | 59 | 0 | } | 60 | | | 61 | 27.7k | float* scratch_space = scratch + kMaxBlocks * kMaxBlocks; | 62 | 27.7k | ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride), | 63 | 27.7k | scratch_space); | 64 | 27.7k | } |
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*) Line | Count | Source | 39 | 19.1k | const size_t output_stride, float* scratch) { | 40 | 19.1k | static_assert(ROWS <= kMaxBlocks, "Unsupported block size"); | 41 | 19.1k | static_assert(COLS <= kMaxBlocks, "Unsupported block size"); | 42 | 19.1k | float* block = scratch; | 43 | 19.1k | if (ROWS < COLS) { | 44 | 0 | for (size_t y = 0; y < LF_ROWS; y++) { | 45 | 0 | for (size_t x = 0; x < LF_COLS; x++) { | 46 | 0 | block[y * COLS + x] = input[y * input_stride + x] * | 47 | 0 | DCTTotalResampleScale<DCT_ROWS, ROWS>(y) * | 48 | 0 | DCTTotalResampleScale<DCT_COLS, COLS>(x); | 49 | 0 | } | 50 | 0 | } | 51 | 19.1k | } else { | 52 | 57.4k | for (size_t y = 0; y < LF_COLS; y++) { | 53 | 114k | for (size_t x = 0; x < LF_ROWS; x++) { | 54 | 76.6k | block[y * ROWS + x] = input[y * input_stride + x] * | 55 | 76.6k | DCTTotalResampleScale<DCT_COLS, COLS>(y) * | 56 | 76.6k | DCTTotalResampleScale<DCT_ROWS, ROWS>(x); | 57 | 76.6k | } | 58 | 38.3k | } | 59 | 19.1k | } | 60 | | | 61 | 19.1k | float* scratch_space = scratch + kMaxBlocks * kMaxBlocks; | 62 | 19.1k | ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride), | 63 | 19.1k | scratch_space); | 64 | 19.1k | } |
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*) Line | Count | Source | 39 | 5.57k | const size_t output_stride, float* scratch) { | 40 | 5.57k | static_assert(ROWS <= kMaxBlocks, "Unsupported block size"); | 41 | 5.57k | static_assert(COLS <= kMaxBlocks, "Unsupported block size"); | 42 | 5.57k | float* block = scratch; | 43 | 5.57k | if (ROWS < COLS) { | 44 | 0 | for (size_t y = 0; y < LF_ROWS; y++) { | 45 | 0 | for (size_t x = 0; x < LF_COLS; x++) { | 46 | 0 | block[y * COLS + x] = input[y * input_stride + x] * | 47 | 0 | DCTTotalResampleScale<DCT_ROWS, ROWS>(y) * | 48 | 0 | DCTTotalResampleScale<DCT_COLS, COLS>(x); | 49 | 0 | } | 50 | 0 | } | 51 | 5.57k | } else { | 52 | 16.7k | for (size_t y = 0; y < LF_COLS; y++) { | 53 | 55.7k | for (size_t x = 0; x < LF_ROWS; x++) { | 54 | 44.5k | block[y * ROWS + x] = input[y * input_stride + x] * | 55 | 44.5k | DCTTotalResampleScale<DCT_COLS, COLS>(y) * | 56 | 44.5k | DCTTotalResampleScale<DCT_ROWS, ROWS>(x); | 57 | 44.5k | } | 58 | 11.1k | } | 59 | 5.57k | } | 60 | | | 61 | 5.57k | float* scratch_space = scratch + kMaxBlocks * kMaxBlocks; | 62 | 5.57k | ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride), | 63 | 5.57k | scratch_space); | 64 | 5.57k | } |
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Line | Count | Source | 39 | 6.93k | const size_t output_stride, float* scratch) { | 40 | 6.93k | static_assert(ROWS <= kMaxBlocks, "Unsupported block size"); | 41 | 6.93k | static_assert(COLS <= kMaxBlocks, "Unsupported block size"); | 42 | 6.93k | float* block = scratch; | 43 | 6.93k | if (ROWS < COLS) { | 44 | 20.8k | for (size_t y = 0; y < LF_ROWS; y++) { | 45 | 69.3k | for (size_t x = 0; x < LF_COLS; x++) { | 46 | 55.4k | block[y * COLS + x] = input[y * input_stride + x] * | 47 | 55.4k | DCTTotalResampleScale<DCT_ROWS, ROWS>(y) * | 48 | 55.4k | DCTTotalResampleScale<DCT_COLS, COLS>(x); | 49 | 55.4k | } | 50 | 13.8k | } | 51 | 6.93k | } else { | 52 | 0 | for (size_t y = 0; y < LF_COLS; y++) { | 53 | 0 | for (size_t x = 0; x < LF_ROWS; x++) { | 54 | 0 | block[y * ROWS + x] = input[y * input_stride + x] * | 55 | 0 | DCTTotalResampleScale<DCT_COLS, COLS>(y) * | 56 | 0 | DCTTotalResampleScale<DCT_ROWS, ROWS>(x); | 57 | 0 | } | 58 | 0 | } | 59 | 0 | } | 60 | | | 61 | 6.93k | float* scratch_space = scratch + kMaxBlocks * kMaxBlocks; | 62 | 6.93k | ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride), | 63 | 6.93k | scratch_space); | 64 | 6.93k | } |
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Line | Count | Source | 39 | 12.3k | const size_t output_stride, float* scratch) { | 40 | 12.3k | static_assert(ROWS <= kMaxBlocks, "Unsupported block size"); | 41 | 12.3k | static_assert(COLS <= kMaxBlocks, "Unsupported block size"); | 42 | 12.3k | float* block = scratch; | 43 | 12.3k | if (ROWS < COLS) { | 44 | 0 | for (size_t y = 0; y < LF_ROWS; y++) { | 45 | 0 | for (size_t x = 0; x < LF_COLS; x++) { | 46 | 0 | block[y * COLS + x] = input[y * input_stride + x] * | 47 | 0 | DCTTotalResampleScale<DCT_ROWS, ROWS>(y) * | 48 | 0 | DCTTotalResampleScale<DCT_COLS, COLS>(x); | 49 | 0 | } | 50 | 0 | } | 51 | 12.3k | } else { | 52 | 61.6k | for (size_t y = 0; y < LF_COLS; y++) { | 53 | 246k | for (size_t x = 0; x < LF_ROWS; x++) { | 54 | 197k | block[y * ROWS + x] = input[y * input_stride + x] * | 55 | 197k | DCTTotalResampleScale<DCT_COLS, COLS>(y) * | 56 | 197k | DCTTotalResampleScale<DCT_ROWS, ROWS>(x); | 57 | 197k | } | 58 | 49.3k | } | 59 | 12.3k | } | 60 | | | 61 | 12.3k | float* scratch_space = scratch + kMaxBlocks * kMaxBlocks; | 62 | 12.3k | ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride), | 63 | 12.3k | scratch_space); | 64 | 12.3k | } |
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Line | Count | Source | 39 | 162 | const size_t output_stride, float* scratch) { | 40 | 162 | static_assert(ROWS <= kMaxBlocks, "Unsupported block size"); | 41 | 162 | static_assert(COLS <= kMaxBlocks, "Unsupported block size"); | 42 | 162 | float* block = scratch; | 43 | 162 | if (ROWS < COLS) { | 44 | 0 | for (size_t y = 0; y < LF_ROWS; y++) { | 45 | 0 | for (size_t x = 0; x < LF_COLS; x++) { | 46 | 0 | block[y * COLS + x] = input[y * input_stride + x] * | 47 | 0 | DCTTotalResampleScale<DCT_ROWS, ROWS>(y) * | 48 | 0 | DCTTotalResampleScale<DCT_COLS, COLS>(x); | 49 | 0 | } | 50 | 0 | } | 51 | 162 | } else { | 52 | 810 | for (size_t y = 0; y < LF_COLS; y++) { | 53 | 5.83k | for (size_t x = 0; x < LF_ROWS; x++) { | 54 | 5.18k | block[y * ROWS + x] = input[y * input_stride + x] * | 55 | 5.18k | DCTTotalResampleScale<DCT_COLS, COLS>(y) * | 56 | 5.18k | DCTTotalResampleScale<DCT_ROWS, ROWS>(x); | 57 | 5.18k | } | 58 | 648 | } | 59 | 162 | } | 60 | | | 61 | 162 | float* scratch_space = scratch + kMaxBlocks * kMaxBlocks; | 62 | 162 | ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride), | 63 | 162 | scratch_space); | 64 | 162 | } |
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*) Line | Count | Source | 39 | 123 | const size_t output_stride, float* scratch) { | 40 | 123 | static_assert(ROWS <= kMaxBlocks, "Unsupported block size"); | 41 | 123 | static_assert(COLS <= kMaxBlocks, "Unsupported block size"); | 42 | 123 | float* block = scratch; | 43 | 123 | if (ROWS < COLS) { | 44 | 615 | for (size_t y = 0; y < LF_ROWS; y++) { | 45 | 4.42k | for (size_t x = 0; x < LF_COLS; x++) { | 46 | 3.93k | block[y * COLS + x] = input[y * input_stride + x] * | 47 | 3.93k | DCTTotalResampleScale<DCT_ROWS, ROWS>(y) * | 48 | 3.93k | DCTTotalResampleScale<DCT_COLS, COLS>(x); | 49 | 3.93k | } | 50 | 492 | } | 51 | 123 | } else { | 52 | 0 | for (size_t y = 0; y < LF_COLS; y++) { | 53 | 0 | for (size_t x = 0; x < LF_ROWS; x++) { | 54 | 0 | block[y * ROWS + x] = input[y * input_stride + x] * | 55 | 0 | DCTTotalResampleScale<DCT_COLS, COLS>(y) * | 56 | 0 | DCTTotalResampleScale<DCT_ROWS, ROWS>(x); | 57 | 0 | } | 58 | 0 | } | 59 | 0 | } | 60 | | | 61 | 123 | float* scratch_space = scratch + kMaxBlocks * kMaxBlocks; | 62 | 123 | ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride), | 63 | 123 | scratch_space); | 64 | 123 | } |
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*) Line | Count | Source | 39 | 1.23k | const size_t output_stride, float* scratch) { | 40 | 1.23k | static_assert(ROWS <= kMaxBlocks, "Unsupported block size"); | 41 | 1.23k | static_assert(COLS <= kMaxBlocks, "Unsupported block size"); | 42 | 1.23k | float* block = scratch; | 43 | 1.23k | if (ROWS < COLS) { | 44 | 0 | for (size_t y = 0; y < LF_ROWS; y++) { | 45 | 0 | for (size_t x = 0; x < LF_COLS; x++) { | 46 | 0 | block[y * COLS + x] = input[y * input_stride + x] * | 47 | 0 | DCTTotalResampleScale<DCT_ROWS, ROWS>(y) * | 48 | 0 | DCTTotalResampleScale<DCT_COLS, COLS>(x); | 49 | 0 | } | 50 | 0 | } | 51 | 1.23k | } else { | 52 | 11.0k | for (size_t y = 0; y < LF_COLS; y++) { | 53 | 88.7k | for (size_t x = 0; x < LF_ROWS; x++) { | 54 | 78.9k | block[y * ROWS + x] = input[y * input_stride + x] * | 55 | 78.9k | DCTTotalResampleScale<DCT_COLS, COLS>(y) * | 56 | 78.9k | DCTTotalResampleScale<DCT_ROWS, ROWS>(x); | 57 | 78.9k | } | 58 | 9.86k | } | 59 | 1.23k | } | 60 | | | 61 | 1.23k | float* scratch_space = scratch + kMaxBlocks * kMaxBlocks; | 62 | 1.23k | ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride), | 63 | 1.23k | scratch_space); | 64 | 1.23k | } |
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*) |
65 | | |
66 | | template <size_t S> |
67 | 4.82M | void DCT2TopBlock(const float* block, size_t stride, float* out) { |
68 | 4.82M | static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim"); |
69 | 4.82M | static_assert(S % 2 == 0, "S should be even"); |
70 | 4.82M | float temp[kDCTBlockSize]; |
71 | 4.82M | constexpr size_t num_2x2 = S / 2; |
72 | 16.0M | for (size_t y = 0; y < num_2x2; y++) { |
73 | 45.0M | for (size_t x = 0; x < num_2x2; x++) { |
74 | 33.7M | float c00 = block[y * 2 * stride + x * 2]; |
75 | 33.7M | float c01 = block[y * 2 * stride + x * 2 + 1]; |
76 | 33.7M | float c10 = block[(y * 2 + 1) * stride + x * 2]; |
77 | 33.7M | float c11 = block[(y * 2 + 1) * stride + x * 2 + 1]; |
78 | 33.7M | float r00 = c00 + c01 + c10 + c11; |
79 | 33.7M | float r01 = c00 + c01 - c10 - c11; |
80 | 33.7M | float r10 = c00 - c01 + c10 - c11; |
81 | 33.7M | float r11 = c00 - c01 - c10 + c11; |
82 | 33.7M | r00 *= 0.25f; |
83 | 33.7M | r01 *= 0.25f; |
84 | 33.7M | r10 *= 0.25f; |
85 | 33.7M | r11 *= 0.25f; |
86 | 33.7M | temp[y * kBlockDim + x] = r00; |
87 | 33.7M | temp[y * kBlockDim + num_2x2 + x] = r01; |
88 | 33.7M | temp[(y + num_2x2) * kBlockDim + x] = r10; |
89 | 33.7M | temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11; |
90 | 33.7M | } |
91 | 11.2M | } |
92 | 27.3M | for (size_t y = 0; y < S; y++) { |
93 | 157M | for (size_t x = 0; x < S; x++) { |
94 | 135M | out[y * kBlockDim + x] = temp[y * kBlockDim + x]; |
95 | 135M | } |
96 | 22.5M | } |
97 | 4.82M | } Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*) enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*) Line | Count | Source | 67 | 206k | void DCT2TopBlock(const float* block, size_t stride, float* out) { | 68 | 206k | static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim"); | 69 | 206k | static_assert(S % 2 == 0, "S should be even"); | 70 | 206k | float temp[kDCTBlockSize]; | 71 | 206k | constexpr size_t num_2x2 = S / 2; | 72 | 1.03M | for (size_t y = 0; y < num_2x2; y++) { | 73 | 4.13M | for (size_t x = 0; x < num_2x2; x++) { | 74 | 3.30M | float c00 = block[y * 2 * stride + x * 2]; | 75 | 3.30M | float c01 = block[y * 2 * stride + x * 2 + 1]; | 76 | 3.30M | float c10 = block[(y * 2 + 1) * stride + x * 2]; | 77 | 3.30M | float c11 = block[(y * 2 + 1) * stride + x * 2 + 1]; | 78 | 3.30M | float r00 = c00 + c01 + c10 + c11; | 79 | 3.30M | float r01 = c00 + c01 - c10 - c11; | 80 | 3.30M | float r10 = c00 - c01 + c10 - c11; | 81 | 3.30M | float r11 = c00 - c01 - c10 + c11; | 82 | 3.30M | r00 *= 0.25f; | 83 | 3.30M | r01 *= 0.25f; | 84 | 3.30M | r10 *= 0.25f; | 85 | 3.30M | r11 *= 0.25f; | 86 | 3.30M | temp[y * kBlockDim + x] = r00; | 87 | 3.30M | temp[y * kBlockDim + num_2x2 + x] = r01; | 88 | 3.30M | temp[(y + num_2x2) * kBlockDim + x] = r10; | 89 | 3.30M | temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11; | 90 | 3.30M | } | 91 | 826k | } | 92 | 1.85M | for (size_t y = 0; y < S; y++) { | 93 | 14.8M | for (size_t x = 0; x < S; x++) { | 94 | 13.2M | out[y * kBlockDim + x] = temp[y * kBlockDim + x]; | 95 | 13.2M | } | 96 | 1.65M | } | 97 | 206k | } |
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*) Line | Count | Source | 67 | 206k | void DCT2TopBlock(const float* block, size_t stride, float* out) { | 68 | 206k | static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim"); | 69 | 206k | static_assert(S % 2 == 0, "S should be even"); | 70 | 206k | float temp[kDCTBlockSize]; | 71 | 206k | constexpr size_t num_2x2 = S / 2; | 72 | 619k | for (size_t y = 0; y < num_2x2; y++) { | 73 | 1.23M | for (size_t x = 0; x < num_2x2; x++) { | 74 | 826k | float c00 = block[y * 2 * stride + x * 2]; | 75 | 826k | float c01 = block[y * 2 * stride + x * 2 + 1]; | 76 | 826k | float c10 = block[(y * 2 + 1) * stride + x * 2]; | 77 | 826k | float c11 = block[(y * 2 + 1) * stride + x * 2 + 1]; | 78 | 826k | float r00 = c00 + c01 + c10 + c11; | 79 | 826k | float r01 = c00 + c01 - c10 - c11; | 80 | 826k | float r10 = c00 - c01 + c10 - c11; | 81 | 826k | float r11 = c00 - c01 - c10 + c11; | 82 | 826k | r00 *= 0.25f; | 83 | 826k | r01 *= 0.25f; | 84 | 826k | r10 *= 0.25f; | 85 | 826k | r11 *= 0.25f; | 86 | 826k | temp[y * kBlockDim + x] = r00; | 87 | 826k | temp[y * kBlockDim + num_2x2 + x] = r01; | 88 | 826k | temp[(y + num_2x2) * kBlockDim + x] = r10; | 89 | 826k | temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11; | 90 | 826k | } | 91 | 413k | } | 92 | 1.03M | for (size_t y = 0; y < S; y++) { | 93 | 4.13M | for (size_t x = 0; x < S; x++) { | 94 | 3.30M | out[y * kBlockDim + x] = temp[y * kBlockDim + x]; | 95 | 3.30M | } | 96 | 826k | } | 97 | 206k | } |
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*) Line | Count | Source | 67 | 206k | void DCT2TopBlock(const float* block, size_t stride, float* out) { | 68 | 206k | static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim"); | 69 | 206k | static_assert(S % 2 == 0, "S should be even"); | 70 | 206k | float temp[kDCTBlockSize]; | 71 | 206k | constexpr size_t num_2x2 = S / 2; | 72 | 413k | for (size_t y = 0; y < num_2x2; y++) { | 73 | 413k | for (size_t x = 0; x < num_2x2; x++) { | 74 | 206k | float c00 = block[y * 2 * stride + x * 2]; | 75 | 206k | float c01 = block[y * 2 * stride + x * 2 + 1]; | 76 | 206k | float c10 = block[(y * 2 + 1) * stride + x * 2]; | 77 | 206k | float c11 = block[(y * 2 + 1) * stride + x * 2 + 1]; | 78 | 206k | float r00 = c00 + c01 + c10 + c11; | 79 | 206k | float r01 = c00 + c01 - c10 - c11; | 80 | 206k | float r10 = c00 - c01 + c10 - c11; | 81 | 206k | float r11 = c00 - c01 - c10 + c11; | 82 | 206k | r00 *= 0.25f; | 83 | 206k | r01 *= 0.25f; | 84 | 206k | r10 *= 0.25f; | 85 | 206k | r11 *= 0.25f; | 86 | 206k | temp[y * kBlockDim + x] = r00; | 87 | 206k | temp[y * kBlockDim + num_2x2 + x] = r01; | 88 | 206k | temp[(y + num_2x2) * kBlockDim + x] = r10; | 89 | 206k | temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11; | 90 | 206k | } | 91 | 206k | } | 92 | 619k | for (size_t y = 0; y < S; y++) { | 93 | 1.23M | for (size_t x = 0; x < S; x++) { | 94 | 826k | out[y * kBlockDim + x] = temp[y * kBlockDim + x]; | 95 | 826k | } | 96 | 413k | } | 97 | 206k | } |
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*) enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*) Line | Count | Source | 67 | 206k | void DCT2TopBlock(const float* block, size_t stride, float* out) { | 68 | 206k | static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim"); | 69 | 206k | static_assert(S % 2 == 0, "S should be even"); | 70 | 206k | float temp[kDCTBlockSize]; | 71 | 206k | constexpr size_t num_2x2 = S / 2; | 72 | 1.03M | for (size_t y = 0; y < num_2x2; y++) { | 73 | 4.13M | for (size_t x = 0; x < num_2x2; x++) { | 74 | 3.30M | float c00 = block[y * 2 * stride + x * 2]; | 75 | 3.30M | float c01 = block[y * 2 * stride + x * 2 + 1]; | 76 | 3.30M | float c10 = block[(y * 2 + 1) * stride + x * 2]; | 77 | 3.30M | float c11 = block[(y * 2 + 1) * stride + x * 2 + 1]; | 78 | 3.30M | float r00 = c00 + c01 + c10 + c11; | 79 | 3.30M | float r01 = c00 + c01 - c10 - c11; | 80 | 3.30M | float r10 = c00 - c01 + c10 - c11; | 81 | 3.30M | float r11 = c00 - c01 - c10 + c11; | 82 | 3.30M | r00 *= 0.25f; | 83 | 3.30M | r01 *= 0.25f; | 84 | 3.30M | r10 *= 0.25f; | 85 | 3.30M | r11 *= 0.25f; | 86 | 3.30M | temp[y * kBlockDim + x] = r00; | 87 | 3.30M | temp[y * kBlockDim + num_2x2 + x] = r01; | 88 | 3.30M | temp[(y + num_2x2) * kBlockDim + x] = r10; | 89 | 3.30M | temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11; | 90 | 3.30M | } | 91 | 826k | } | 92 | 1.85M | for (size_t y = 0; y < S; y++) { | 93 | 14.8M | for (size_t x = 0; x < S; x++) { | 94 | 13.2M | out[y * kBlockDim + x] = temp[y * kBlockDim + x]; | 95 | 13.2M | } | 96 | 1.65M | } | 97 | 206k | } |
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*) Line | Count | Source | 67 | 206k | void DCT2TopBlock(const float* block, size_t stride, float* out) { | 68 | 206k | static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim"); | 69 | 206k | static_assert(S % 2 == 0, "S should be even"); | 70 | 206k | float temp[kDCTBlockSize]; | 71 | 206k | constexpr size_t num_2x2 = S / 2; | 72 | 619k | for (size_t y = 0; y < num_2x2; y++) { | 73 | 1.23M | for (size_t x = 0; x < num_2x2; x++) { | 74 | 826k | float c00 = block[y * 2 * stride + x * 2]; | 75 | 826k | float c01 = block[y * 2 * stride + x * 2 + 1]; | 76 | 826k | float c10 = block[(y * 2 + 1) * stride + x * 2]; | 77 | 826k | float c11 = block[(y * 2 + 1) * stride + x * 2 + 1]; | 78 | 826k | float r00 = c00 + c01 + c10 + c11; | 79 | 826k | float r01 = c00 + c01 - c10 - c11; | 80 | 826k | float r10 = c00 - c01 + c10 - c11; | 81 | 826k | float r11 = c00 - c01 - c10 + c11; | 82 | 826k | r00 *= 0.25f; | 83 | 826k | r01 *= 0.25f; | 84 | 826k | r10 *= 0.25f; | 85 | 826k | r11 *= 0.25f; | 86 | 826k | temp[y * kBlockDim + x] = r00; | 87 | 826k | temp[y * kBlockDim + num_2x2 + x] = r01; | 88 | 826k | temp[(y + num_2x2) * kBlockDim + x] = r10; | 89 | 826k | temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11; | 90 | 826k | } | 91 | 413k | } | 92 | 1.03M | for (size_t y = 0; y < S; y++) { | 93 | 4.13M | for (size_t x = 0; x < S; x++) { | 94 | 3.30M | out[y * kBlockDim + x] = temp[y * kBlockDim + x]; | 95 | 3.30M | } | 96 | 826k | } | 97 | 206k | } |
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*) Line | Count | Source | 67 | 206k | void DCT2TopBlock(const float* block, size_t stride, float* out) { | 68 | 206k | static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim"); | 69 | 206k | static_assert(S % 2 == 0, "S should be even"); | 70 | 206k | float temp[kDCTBlockSize]; | 71 | 206k | constexpr size_t num_2x2 = S / 2; | 72 | 413k | for (size_t y = 0; y < num_2x2; y++) { | 73 | 413k | for (size_t x = 0; x < num_2x2; x++) { | 74 | 206k | float c00 = block[y * 2 * stride + x * 2]; | 75 | 206k | float c01 = block[y * 2 * stride + x * 2 + 1]; | 76 | 206k | float c10 = block[(y * 2 + 1) * stride + x * 2]; | 77 | 206k | float c11 = block[(y * 2 + 1) * stride + x * 2 + 1]; | 78 | 206k | float r00 = c00 + c01 + c10 + c11; | 79 | 206k | float r01 = c00 + c01 - c10 - c11; | 80 | 206k | float r10 = c00 - c01 + c10 - c11; | 81 | 206k | float r11 = c00 - c01 - c10 + c11; | 82 | 206k | r00 *= 0.25f; | 83 | 206k | r01 *= 0.25f; | 84 | 206k | r10 *= 0.25f; | 85 | 206k | r11 *= 0.25f; | 86 | 206k | temp[y * kBlockDim + x] = r00; | 87 | 206k | temp[y * kBlockDim + num_2x2 + x] = r01; | 88 | 206k | temp[(y + num_2x2) * kBlockDim + x] = r10; | 89 | 206k | temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11; | 90 | 206k | } | 91 | 206k | } | 92 | 619k | for (size_t y = 0; y < S; y++) { | 93 | 1.23M | for (size_t x = 0; x < S; x++) { | 94 | 826k | out[y * kBlockDim + x] = temp[y * kBlockDim + x]; | 95 | 826k | } | 96 | 413k | } | 97 | 206k | } |
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*) enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*) Line | Count | Source | 67 | 1.19M | void DCT2TopBlock(const float* block, size_t stride, float* out) { | 68 | 1.19M | static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim"); | 69 | 1.19M | static_assert(S % 2 == 0, "S should be even"); | 70 | 1.19M | float temp[kDCTBlockSize]; | 71 | 1.19M | constexpr size_t num_2x2 = S / 2; | 72 | 5.97M | for (size_t y = 0; y < num_2x2; y++) { | 73 | 23.8M | for (size_t x = 0; x < num_2x2; x++) { | 74 | 19.1M | float c00 = block[y * 2 * stride + x * 2]; | 75 | 19.1M | float c01 = block[y * 2 * stride + x * 2 + 1]; | 76 | 19.1M | float c10 = block[(y * 2 + 1) * stride + x * 2]; | 77 | 19.1M | float c11 = block[(y * 2 + 1) * stride + x * 2 + 1]; | 78 | 19.1M | float r00 = c00 + c01 + c10 + c11; | 79 | 19.1M | float r01 = c00 + c01 - c10 - c11; | 80 | 19.1M | float r10 = c00 - c01 + c10 - c11; | 81 | 19.1M | float r11 = c00 - c01 - c10 + c11; | 82 | 19.1M | r00 *= 0.25f; | 83 | 19.1M | r01 *= 0.25f; | 84 | 19.1M | r10 *= 0.25f; | 85 | 19.1M | r11 *= 0.25f; | 86 | 19.1M | temp[y * kBlockDim + x] = r00; | 87 | 19.1M | temp[y * kBlockDim + num_2x2 + x] = r01; | 88 | 19.1M | temp[(y + num_2x2) * kBlockDim + x] = r10; | 89 | 19.1M | temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11; | 90 | 19.1M | } | 91 | 4.77M | } | 92 | 10.7M | for (size_t y = 0; y < S; y++) { | 93 | 85.9M | for (size_t x = 0; x < S; x++) { | 94 | 76.4M | out[y * kBlockDim + x] = temp[y * kBlockDim + x]; | 95 | 76.4M | } | 96 | 9.55M | } | 97 | 1.19M | } |
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*) Line | Count | Source | 67 | 1.19M | void DCT2TopBlock(const float* block, size_t stride, float* out) { | 68 | 1.19M | static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim"); | 69 | 1.19M | static_assert(S % 2 == 0, "S should be even"); | 70 | 1.19M | float temp[kDCTBlockSize]; | 71 | 1.19M | constexpr size_t num_2x2 = S / 2; | 72 | 3.58M | for (size_t y = 0; y < num_2x2; y++) { | 73 | 7.16M | for (size_t x = 0; x < num_2x2; x++) { | 74 | 4.77M | float c00 = block[y * 2 * stride + x * 2]; | 75 | 4.77M | float c01 = block[y * 2 * stride + x * 2 + 1]; | 76 | 4.77M | float c10 = block[(y * 2 + 1) * stride + x * 2]; | 77 | 4.77M | float c11 = block[(y * 2 + 1) * stride + x * 2 + 1]; | 78 | 4.77M | float r00 = c00 + c01 + c10 + c11; | 79 | 4.77M | float r01 = c00 + c01 - c10 - c11; | 80 | 4.77M | float r10 = c00 - c01 + c10 - c11; | 81 | 4.77M | float r11 = c00 - c01 - c10 + c11; | 82 | 4.77M | r00 *= 0.25f; | 83 | 4.77M | r01 *= 0.25f; | 84 | 4.77M | r10 *= 0.25f; | 85 | 4.77M | r11 *= 0.25f; | 86 | 4.77M | temp[y * kBlockDim + x] = r00; | 87 | 4.77M | temp[y * kBlockDim + num_2x2 + x] = r01; | 88 | 4.77M | temp[(y + num_2x2) * kBlockDim + x] = r10; | 89 | 4.77M | temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11; | 90 | 4.77M | } | 91 | 2.38M | } | 92 | 5.97M | for (size_t y = 0; y < S; y++) { | 93 | 23.8M | for (size_t x = 0; x < S; x++) { | 94 | 19.1M | out[y * kBlockDim + x] = temp[y * kBlockDim + x]; | 95 | 19.1M | } | 96 | 4.77M | } | 97 | 1.19M | } |
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*) Line | Count | Source | 67 | 1.19M | void DCT2TopBlock(const float* block, size_t stride, float* out) { | 68 | 1.19M | static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim"); | 69 | 1.19M | static_assert(S % 2 == 0, "S should be even"); | 70 | 1.19M | float temp[kDCTBlockSize]; | 71 | 1.19M | constexpr size_t num_2x2 = S / 2; | 72 | 2.38M | for (size_t y = 0; y < num_2x2; y++) { | 73 | 2.38M | for (size_t x = 0; x < num_2x2; x++) { | 74 | 1.19M | float c00 = block[y * 2 * stride + x * 2]; | 75 | 1.19M | float c01 = block[y * 2 * stride + x * 2 + 1]; | 76 | 1.19M | float c10 = block[(y * 2 + 1) * stride + x * 2]; | 77 | 1.19M | float c11 = block[(y * 2 + 1) * stride + x * 2 + 1]; | 78 | 1.19M | float r00 = c00 + c01 + c10 + c11; | 79 | 1.19M | float r01 = c00 + c01 - c10 - c11; | 80 | 1.19M | float r10 = c00 - c01 + c10 - c11; | 81 | 1.19M | float r11 = c00 - c01 - c10 + c11; | 82 | 1.19M | r00 *= 0.25f; | 83 | 1.19M | r01 *= 0.25f; | 84 | 1.19M | r10 *= 0.25f; | 85 | 1.19M | r11 *= 0.25f; | 86 | 1.19M | temp[y * kBlockDim + x] = r00; | 87 | 1.19M | temp[y * kBlockDim + num_2x2 + x] = r01; | 88 | 1.19M | temp[(y + num_2x2) * kBlockDim + x] = r10; | 89 | 1.19M | temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11; | 90 | 1.19M | } | 91 | 1.19M | } | 92 | 3.58M | for (size_t y = 0; y < S; y++) { | 93 | 7.16M | for (size_t x = 0; x < S; x++) { | 94 | 4.77M | out[y * kBlockDim + x] = temp[y * kBlockDim + x]; | 95 | 4.77M | } | 96 | 2.38M | } | 97 | 1.19M | } |
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*) |
98 | | |
99 | 4.92M | void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) { |
100 | 4.92M | HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = { |
101 | 4.92M | { |
102 | 4.92M | 0.2500000000000000, |
103 | 4.92M | 0.8769029297991420f, |
104 | 4.92M | 0.0000000000000000, |
105 | 4.92M | 0.0000000000000000, |
106 | 4.92M | 0.0000000000000000, |
107 | 4.92M | -0.4105377591765233f, |
108 | 4.92M | 0.0000000000000000, |
109 | 4.92M | 0.0000000000000000, |
110 | 4.92M | 0.0000000000000000, |
111 | 4.92M | 0.0000000000000000, |
112 | 4.92M | 0.0000000000000000, |
113 | 4.92M | 0.0000000000000000, |
114 | 4.92M | 0.0000000000000000, |
115 | 4.92M | 0.0000000000000000, |
116 | 4.92M | 0.0000000000000000, |
117 | 4.92M | 0.0000000000000000, |
118 | 4.92M | }, |
119 | 4.92M | { |
120 | 4.92M | 0.2500000000000000, |
121 | 4.92M | 0.2206518106944235f, |
122 | 4.92M | 0.0000000000000000, |
123 | 4.92M | 0.0000000000000000, |
124 | 4.92M | -0.7071067811865474f, |
125 | 4.92M | 0.6235485373547691f, |
126 | 4.92M | 0.0000000000000000, |
127 | 4.92M | 0.0000000000000000, |
128 | 4.92M | 0.0000000000000000, |
129 | 4.92M | 0.0000000000000000, |
130 | 4.92M | 0.0000000000000000, |
131 | 4.92M | 0.0000000000000000, |
132 | 4.92M | 0.0000000000000000, |
133 | 4.92M | 0.0000000000000000, |
134 | 4.92M | 0.0000000000000000, |
135 | 4.92M | 0.0000000000000000, |
136 | 4.92M | }, |
137 | 4.92M | { |
138 | 4.92M | 0.2500000000000000, |
139 | 4.92M | -0.1014005039375376f, |
140 | 4.92M | 0.4067007583026075f, |
141 | 4.92M | -0.2125574805828875f, |
142 | 4.92M | 0.0000000000000000, |
143 | 4.92M | -0.0643507165794627f, |
144 | 4.92M | -0.4517556589999482f, |
145 | 4.92M | -0.3046847507248690f, |
146 | 4.92M | 0.3017929516615495f, |
147 | 4.92M | 0.4082482904638627f, |
148 | 4.92M | 0.1747866975480809f, |
149 | 4.92M | -0.2110560104933578f, |
150 | 4.92M | -0.1426608480880726f, |
151 | 4.92M | -0.1381354035075859f, |
152 | 4.92M | -0.1743760259965107f, |
153 | 4.92M | 0.1135498731499434f, |
154 | 4.92M | }, |
155 | 4.92M | { |
156 | 4.92M | 0.2500000000000000, |
157 | 4.92M | -0.1014005039375375f, |
158 | 4.92M | 0.4444481661973445f, |
159 | 4.92M | 0.3085497062849767f, |
160 | 4.92M | 0.0000000000000000f, |
161 | 4.92M | -0.0643507165794627f, |
162 | 4.92M | 0.1585450355184006f, |
163 | 4.92M | 0.5112616136591823f, |
164 | 4.92M | 0.2579236279634118f, |
165 | 4.92M | 0.0000000000000000, |
166 | 4.92M | 0.0812611176717539f, |
167 | 4.92M | 0.1856718091610980f, |
168 | 4.92M | -0.3416446842253372f, |
169 | 4.92M | 0.3302282550303788f, |
170 | 4.92M | 0.0702790691196284f, |
171 | 4.92M | -0.0741750459581035f, |
172 | 4.92M | }, |
173 | 4.92M | { |
174 | 4.92M | 0.2500000000000000, |
175 | 4.92M | 0.2206518106944236f, |
176 | 4.92M | 0.0000000000000000, |
177 | 4.92M | 0.0000000000000000, |
178 | 4.92M | 0.7071067811865476f, |
179 | 4.92M | 0.6235485373547694f, |
180 | 4.92M | 0.0000000000000000, |
181 | 4.92M | 0.0000000000000000, |
182 | 4.92M | 0.0000000000000000, |
183 | 4.92M | 0.0000000000000000, |
184 | 4.92M | 0.0000000000000000, |
185 | 4.92M | 0.0000000000000000, |
186 | 4.92M | 0.0000000000000000, |
187 | 4.92M | 0.0000000000000000, |
188 | 4.92M | 0.0000000000000000, |
189 | 4.92M | 0.0000000000000000, |
190 | 4.92M | }, |
191 | 4.92M | { |
192 | 4.92M | 0.2500000000000000, |
193 | 4.92M | -0.1014005039375378f, |
194 | 4.92M | 0.0000000000000000, |
195 | 4.92M | 0.4706702258572536f, |
196 | 4.92M | 0.0000000000000000, |
197 | 4.92M | -0.0643507165794628f, |
198 | 4.92M | -0.0403851516082220f, |
199 | 4.92M | 0.0000000000000000, |
200 | 4.92M | 0.1627234014286620f, |
201 | 4.92M | 0.0000000000000000, |
202 | 4.92M | 0.0000000000000000, |
203 | 4.92M | 0.0000000000000000, |
204 | 4.92M | 0.7367497537172237f, |
205 | 4.92M | 0.0875511500058708f, |
206 | 4.92M | -0.2921026642334881f, |
207 | 4.92M | 0.1940289303259434f, |
208 | 4.92M | }, |
209 | 4.92M | { |
210 | 4.92M | 0.2500000000000000, |
211 | 4.92M | -0.1014005039375377f, |
212 | 4.92M | 0.1957439937204294f, |
213 | 4.92M | -0.1621205195722993f, |
214 | 4.92M | 0.0000000000000000, |
215 | 4.92M | -0.0643507165794628f, |
216 | 4.92M | 0.0074182263792424f, |
217 | 4.92M | -0.2904801297289980f, |
218 | 4.92M | 0.0952002265347504f, |
219 | 4.92M | 0.0000000000000000, |
220 | 4.92M | -0.3675398009862027f, |
221 | 4.92M | 0.4921585901373873f, |
222 | 4.92M | 0.2462710772207515f, |
223 | 4.92M | -0.0794670660590957f, |
224 | 4.92M | 0.3623817333531167f, |
225 | 4.92M | -0.4351904965232280f, |
226 | 4.92M | }, |
227 | 4.92M | { |
228 | 4.92M | 0.2500000000000000, |
229 | 4.92M | -0.1014005039375376f, |
230 | 4.92M | 0.2929100136981264f, |
231 | 4.92M | 0.0000000000000000, |
232 | 4.92M | 0.0000000000000000, |
233 | 4.92M | -0.0643507165794627f, |
234 | 4.92M | 0.3935103426921017f, |
235 | 4.92M | -0.0657870154914280f, |
236 | 4.92M | 0.0000000000000000, |
237 | 4.92M | -0.4082482904638628f, |
238 | 4.92M | -0.3078822139579090f, |
239 | 4.92M | -0.3852501370925192f, |
240 | 4.92M | -0.0857401903551931f, |
241 | 4.92M | -0.4613374887461511f, |
242 | 4.92M | 0.0000000000000000, |
243 | 4.92M | 0.2191868483885747f, |
244 | 4.92M | }, |
245 | 4.92M | { |
246 | 4.92M | 0.2500000000000000, |
247 | 4.92M | -0.1014005039375376f, |
248 | 4.92M | -0.4067007583026072f, |
249 | 4.92M | -0.2125574805828705f, |
250 | 4.92M | 0.0000000000000000, |
251 | 4.92M | -0.0643507165794627f, |
252 | 4.92M | -0.4517556589999464f, |
253 | 4.92M | 0.3046847507248840f, |
254 | 4.92M | 0.3017929516615503f, |
255 | 4.92M | -0.4082482904638635f, |
256 | 4.92M | -0.1747866975480813f, |
257 | 4.92M | 0.2110560104933581f, |
258 | 4.92M | -0.1426608480880734f, |
259 | 4.92M | -0.1381354035075829f, |
260 | 4.92M | -0.1743760259965108f, |
261 | 4.92M | 0.1135498731499426f, |
262 | 4.92M | }, |
263 | 4.92M | { |
264 | 4.92M | 0.2500000000000000, |
265 | 4.92M | -0.1014005039375377f, |
266 | 4.92M | -0.1957439937204287f, |
267 | 4.92M | -0.1621205195722833f, |
268 | 4.92M | 0.0000000000000000, |
269 | 4.92M | -0.0643507165794628f, |
270 | 4.92M | 0.0074182263792444f, |
271 | 4.92M | 0.2904801297290076f, |
272 | 4.92M | 0.0952002265347505f, |
273 | 4.92M | 0.0000000000000000, |
274 | 4.92M | 0.3675398009862011f, |
275 | 4.92M | -0.4921585901373891f, |
276 | 4.92M | 0.2462710772207514f, |
277 | 4.92M | -0.0794670660591026f, |
278 | 4.92M | 0.3623817333531165f, |
279 | 4.92M | -0.4351904965232251f, |
280 | 4.92M | }, |
281 | 4.92M | { |
282 | 4.92M | 0.2500000000000000, |
283 | 4.92M | -0.1014005039375375f, |
284 | 4.92M | 0.0000000000000000, |
285 | 4.92M | -0.4706702258572528f, |
286 | 4.92M | 0.0000000000000000, |
287 | 4.92M | -0.0643507165794627f, |
288 | 4.92M | 0.1107416575309343f, |
289 | 4.92M | 0.0000000000000000, |
290 | 4.92M | -0.1627234014286617f, |
291 | 4.92M | 0.0000000000000000, |
292 | 4.92M | 0.0000000000000000, |
293 | 4.92M | 0.0000000000000000, |
294 | 4.92M | 0.1488339922711357f, |
295 | 4.92M | 0.4972464710953509f, |
296 | 4.92M | 0.2921026642334879f, |
297 | 4.92M | 0.5550443808910661f, |
298 | 4.92M | }, |
299 | 4.92M | { |
300 | 4.92M | 0.2500000000000000, |
301 | 4.92M | -0.1014005039375377f, |
302 | 4.92M | 0.1137907446044809f, |
303 | 4.92M | -0.1464291867126764f, |
304 | 4.92M | 0.0000000000000000, |
305 | 4.92M | -0.0643507165794628f, |
306 | 4.92M | 0.0829816309488205f, |
307 | 4.92M | -0.2388977352334460f, |
308 | 4.92M | -0.3531238544981630f, |
309 | 4.92M | -0.4082482904638630f, |
310 | 4.92M | 0.4826689115059883f, |
311 | 4.92M | 0.1741941265991622f, |
312 | 4.92M | -0.0476868035022925f, |
313 | 4.92M | 0.1253805944856366f, |
314 | 4.92M | -0.4326608024727445f, |
315 | 4.92M | -0.2546827712406646f, |
316 | 4.92M | }, |
317 | 4.92M | { |
318 | 4.92M | 0.2500000000000000, |
319 | 4.92M | -0.1014005039375377f, |
320 | 4.92M | -0.4444481661973438f, |
321 | 4.92M | 0.3085497062849487f, |
322 | 4.92M | 0.0000000000000000, |
323 | 4.92M | -0.0643507165794628f, |
324 | 4.92M | 0.1585450355183970f, |
325 | 4.92M | -0.5112616136592012f, |
326 | 4.92M | 0.2579236279634129f, |
327 | 4.92M | 0.0000000000000000, |
328 | 4.92M | -0.0812611176717504f, |
329 | 4.92M | -0.1856718091610990f, |
330 | 4.92M | -0.3416446842253373f, |
331 | 4.92M | 0.3302282550303805f, |
332 | 4.92M | 0.0702790691196282f, |
333 | 4.92M | -0.0741750459581023f, |
334 | 4.92M | }, |
335 | 4.92M | { |
336 | 4.92M | 0.2500000000000000, |
337 | 4.92M | -0.1014005039375376f, |
338 | 4.92M | -0.2929100136981264f, |
339 | 4.92M | 0.0000000000000000, |
340 | 4.92M | 0.0000000000000000, |
341 | 4.92M | -0.0643507165794627f, |
342 | 4.92M | 0.3935103426921022f, |
343 | 4.92M | 0.0657870154914254f, |
344 | 4.92M | 0.0000000000000000, |
345 | 4.92M | 0.4082482904638634f, |
346 | 4.92M | 0.3078822139579031f, |
347 | 4.92M | 0.3852501370925211f, |
348 | 4.92M | -0.0857401903551927f, |
349 | 4.92M | -0.4613374887461554f, |
350 | 4.92M | 0.0000000000000000, |
351 | 4.92M | 0.2191868483885728f, |
352 | 4.92M | }, |
353 | 4.92M | { |
354 | 4.92M | 0.2500000000000000, |
355 | 4.92M | -0.1014005039375376f, |
356 | 4.92M | -0.1137907446044814f, |
357 | 4.92M | -0.1464291867126654f, |
358 | 4.92M | 0.0000000000000000, |
359 | 4.92M | -0.0643507165794627f, |
360 | 4.92M | 0.0829816309488214f, |
361 | 4.92M | 0.2388977352334547f, |
362 | 4.92M | -0.3531238544981624f, |
363 | 4.92M | 0.4082482904638630f, |
364 | 4.92M | -0.4826689115059858f, |
365 | 4.92M | -0.1741941265991621f, |
366 | 4.92M | -0.0476868035022928f, |
367 | 4.92M | 0.1253805944856431f, |
368 | 4.92M | -0.4326608024727457f, |
369 | 4.92M | -0.2546827712406641f, |
370 | 4.92M | }, |
371 | 4.92M | { |
372 | 4.92M | 0.2500000000000000, |
373 | 4.92M | -0.1014005039375374f, |
374 | 4.92M | 0.0000000000000000, |
375 | 4.92M | 0.4251149611657548f, |
376 | 4.92M | 0.0000000000000000, |
377 | 4.92M | -0.0643507165794626f, |
378 | 4.92M | -0.4517556589999480f, |
379 | 4.92M | 0.0000000000000000, |
380 | 4.92M | -0.6035859033230976f, |
381 | 4.92M | 0.0000000000000000, |
382 | 4.92M | 0.0000000000000000, |
383 | 4.92M | 0.0000000000000000, |
384 | 4.92M | -0.1426608480880724f, |
385 | 4.92M | -0.1381354035075845f, |
386 | 4.92M | 0.3487520519930227f, |
387 | 4.92M | 0.1135498731499429f, |
388 | 4.92M | }, |
389 | 4.92M | }; |
390 | | |
391 | 4.92M | const HWY_CAPPED(float, 16) d; |
392 | 14.7M | for (size_t i = 0; i < 16; i += Lanes(d)) { |
393 | 9.84M | auto scalar = Zero(d); |
394 | 167M | for (size_t j = 0; j < 16; j++) { |
395 | 157M | auto px = Set(d, pixels[j]); |
396 | 157M | auto basis = Load(d, k4x4AFVBasisTranspose[j] + i); |
397 | 157M | scalar = MulAdd(px, basis, scalar); |
398 | 157M | } |
399 | 9.84M | Store(scalar, d, coeffs + i); |
400 | 9.84M | } |
401 | 4.92M | } Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*) Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*) enc_group.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*) Line | Count | Source | 99 | 73.1k | void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) { | 100 | 73.1k | HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = { | 101 | 73.1k | { | 102 | 73.1k | 0.2500000000000000, | 103 | 73.1k | 0.8769029297991420f, | 104 | 73.1k | 0.0000000000000000, | 105 | 73.1k | 0.0000000000000000, | 106 | 73.1k | 0.0000000000000000, | 107 | 73.1k | -0.4105377591765233f, | 108 | 73.1k | 0.0000000000000000, | 109 | 73.1k | 0.0000000000000000, | 110 | 73.1k | 0.0000000000000000, | 111 | 73.1k | 0.0000000000000000, | 112 | 73.1k | 0.0000000000000000, | 113 | 73.1k | 0.0000000000000000, | 114 | 73.1k | 0.0000000000000000, | 115 | 73.1k | 0.0000000000000000, | 116 | 73.1k | 0.0000000000000000, | 117 | 73.1k | 0.0000000000000000, | 118 | 73.1k | }, | 119 | 73.1k | { | 120 | 73.1k | 0.2500000000000000, | 121 | 73.1k | 0.2206518106944235f, | 122 | 73.1k | 0.0000000000000000, | 123 | 73.1k | 0.0000000000000000, | 124 | 73.1k | -0.7071067811865474f, | 125 | 73.1k | 0.6235485373547691f, | 126 | 73.1k | 0.0000000000000000, | 127 | 73.1k | 0.0000000000000000, | 128 | 73.1k | 0.0000000000000000, | 129 | 73.1k | 0.0000000000000000, | 130 | 73.1k | 0.0000000000000000, | 131 | 73.1k | 0.0000000000000000, | 132 | 73.1k | 0.0000000000000000, | 133 | 73.1k | 0.0000000000000000, | 134 | 73.1k | 0.0000000000000000, | 135 | 73.1k | 0.0000000000000000, | 136 | 73.1k | }, | 137 | 73.1k | { | 138 | 73.1k | 0.2500000000000000, | 139 | 73.1k | -0.1014005039375376f, | 140 | 73.1k | 0.4067007583026075f, | 141 | 73.1k | -0.2125574805828875f, | 142 | 73.1k | 0.0000000000000000, | 143 | 73.1k | -0.0643507165794627f, | 144 | 73.1k | -0.4517556589999482f, | 145 | 73.1k | -0.3046847507248690f, | 146 | 73.1k | 0.3017929516615495f, | 147 | 73.1k | 0.4082482904638627f, | 148 | 73.1k | 0.1747866975480809f, | 149 | 73.1k | -0.2110560104933578f, | 150 | 73.1k | -0.1426608480880726f, | 151 | 73.1k | -0.1381354035075859f, | 152 | 73.1k | -0.1743760259965107f, | 153 | 73.1k | 0.1135498731499434f, | 154 | 73.1k | }, | 155 | 73.1k | { | 156 | 73.1k | 0.2500000000000000, | 157 | 73.1k | -0.1014005039375375f, | 158 | 73.1k | 0.4444481661973445f, | 159 | 73.1k | 0.3085497062849767f, | 160 | 73.1k | 0.0000000000000000f, | 161 | 73.1k | -0.0643507165794627f, | 162 | 73.1k | 0.1585450355184006f, | 163 | 73.1k | 0.5112616136591823f, | 164 | 73.1k | 0.2579236279634118f, | 165 | 73.1k | 0.0000000000000000, | 166 | 73.1k | 0.0812611176717539f, | 167 | 73.1k | 0.1856718091610980f, | 168 | 73.1k | -0.3416446842253372f, | 169 | 73.1k | 0.3302282550303788f, | 170 | 73.1k | 0.0702790691196284f, | 171 | 73.1k | -0.0741750459581035f, | 172 | 73.1k | }, | 173 | 73.1k | { | 174 | 73.1k | 0.2500000000000000, | 175 | 73.1k | 0.2206518106944236f, | 176 | 73.1k | 0.0000000000000000, | 177 | 73.1k | 0.0000000000000000, | 178 | 73.1k | 0.7071067811865476f, | 179 | 73.1k | 0.6235485373547694f, | 180 | 73.1k | 0.0000000000000000, | 181 | 73.1k | 0.0000000000000000, | 182 | 73.1k | 0.0000000000000000, | 183 | 73.1k | 0.0000000000000000, | 184 | 73.1k | 0.0000000000000000, | 185 | 73.1k | 0.0000000000000000, | 186 | 73.1k | 0.0000000000000000, | 187 | 73.1k | 0.0000000000000000, | 188 | 73.1k | 0.0000000000000000, | 189 | 73.1k | 0.0000000000000000, | 190 | 73.1k | }, | 191 | 73.1k | { | 192 | 73.1k | 0.2500000000000000, | 193 | 73.1k | -0.1014005039375378f, | 194 | 73.1k | 0.0000000000000000, | 195 | 73.1k | 0.4706702258572536f, | 196 | 73.1k | 0.0000000000000000, | 197 | 73.1k | -0.0643507165794628f, | 198 | 73.1k | -0.0403851516082220f, | 199 | 73.1k | 0.0000000000000000, | 200 | 73.1k | 0.1627234014286620f, | 201 | 73.1k | 0.0000000000000000, | 202 | 73.1k | 0.0000000000000000, | 203 | 73.1k | 0.0000000000000000, | 204 | 73.1k | 0.7367497537172237f, | 205 | 73.1k | 0.0875511500058708f, | 206 | 73.1k | -0.2921026642334881f, | 207 | 73.1k | 0.1940289303259434f, | 208 | 73.1k | }, | 209 | 73.1k | { | 210 | 73.1k | 0.2500000000000000, | 211 | 73.1k | -0.1014005039375377f, | 212 | 73.1k | 0.1957439937204294f, | 213 | 73.1k | -0.1621205195722993f, | 214 | 73.1k | 0.0000000000000000, | 215 | 73.1k | -0.0643507165794628f, | 216 | 73.1k | 0.0074182263792424f, | 217 | 73.1k | -0.2904801297289980f, | 218 | 73.1k | 0.0952002265347504f, | 219 | 73.1k | 0.0000000000000000, | 220 | 73.1k | -0.3675398009862027f, | 221 | 73.1k | 0.4921585901373873f, | 222 | 73.1k | 0.2462710772207515f, | 223 | 73.1k | -0.0794670660590957f, | 224 | 73.1k | 0.3623817333531167f, | 225 | 73.1k | -0.4351904965232280f, | 226 | 73.1k | }, | 227 | 73.1k | { | 228 | 73.1k | 0.2500000000000000, | 229 | 73.1k | -0.1014005039375376f, | 230 | 73.1k | 0.2929100136981264f, | 231 | 73.1k | 0.0000000000000000, | 232 | 73.1k | 0.0000000000000000, | 233 | 73.1k | -0.0643507165794627f, | 234 | 73.1k | 0.3935103426921017f, | 235 | 73.1k | -0.0657870154914280f, | 236 | 73.1k | 0.0000000000000000, | 237 | 73.1k | -0.4082482904638628f, | 238 | 73.1k | -0.3078822139579090f, | 239 | 73.1k | -0.3852501370925192f, | 240 | 73.1k | -0.0857401903551931f, | 241 | 73.1k | -0.4613374887461511f, | 242 | 73.1k | 0.0000000000000000, | 243 | 73.1k | 0.2191868483885747f, | 244 | 73.1k | }, | 245 | 73.1k | { | 246 | 73.1k | 0.2500000000000000, | 247 | 73.1k | -0.1014005039375376f, | 248 | 73.1k | -0.4067007583026072f, | 249 | 73.1k | -0.2125574805828705f, | 250 | 73.1k | 0.0000000000000000, | 251 | 73.1k | -0.0643507165794627f, | 252 | 73.1k | -0.4517556589999464f, | 253 | 73.1k | 0.3046847507248840f, | 254 | 73.1k | 0.3017929516615503f, | 255 | 73.1k | -0.4082482904638635f, | 256 | 73.1k | -0.1747866975480813f, | 257 | 73.1k | 0.2110560104933581f, | 258 | 73.1k | -0.1426608480880734f, | 259 | 73.1k | -0.1381354035075829f, | 260 | 73.1k | -0.1743760259965108f, | 261 | 73.1k | 0.1135498731499426f, | 262 | 73.1k | }, | 263 | 73.1k | { | 264 | 73.1k | 0.2500000000000000, | 265 | 73.1k | -0.1014005039375377f, | 266 | 73.1k | -0.1957439937204287f, | 267 | 73.1k | -0.1621205195722833f, | 268 | 73.1k | 0.0000000000000000, | 269 | 73.1k | -0.0643507165794628f, | 270 | 73.1k | 0.0074182263792444f, | 271 | 73.1k | 0.2904801297290076f, | 272 | 73.1k | 0.0952002265347505f, | 273 | 73.1k | 0.0000000000000000, | 274 | 73.1k | 0.3675398009862011f, | 275 | 73.1k | -0.4921585901373891f, | 276 | 73.1k | 0.2462710772207514f, | 277 | 73.1k | -0.0794670660591026f, | 278 | 73.1k | 0.3623817333531165f, | 279 | 73.1k | -0.4351904965232251f, | 280 | 73.1k | }, | 281 | 73.1k | { | 282 | 73.1k | 0.2500000000000000, | 283 | 73.1k | -0.1014005039375375f, | 284 | 73.1k | 0.0000000000000000, | 285 | 73.1k | -0.4706702258572528f, | 286 | 73.1k | 0.0000000000000000, | 287 | 73.1k | -0.0643507165794627f, | 288 | 73.1k | 0.1107416575309343f, | 289 | 73.1k | 0.0000000000000000, | 290 | 73.1k | -0.1627234014286617f, | 291 | 73.1k | 0.0000000000000000, | 292 | 73.1k | 0.0000000000000000, | 293 | 73.1k | 0.0000000000000000, | 294 | 73.1k | 0.1488339922711357f, | 295 | 73.1k | 0.4972464710953509f, | 296 | 73.1k | 0.2921026642334879f, | 297 | 73.1k | 0.5550443808910661f, | 298 | 73.1k | }, | 299 | 73.1k | { | 300 | 73.1k | 0.2500000000000000, | 301 | 73.1k | -0.1014005039375377f, | 302 | 73.1k | 0.1137907446044809f, | 303 | 73.1k | -0.1464291867126764f, | 304 | 73.1k | 0.0000000000000000, | 305 | 73.1k | -0.0643507165794628f, | 306 | 73.1k | 0.0829816309488205f, | 307 | 73.1k | -0.2388977352334460f, | 308 | 73.1k | -0.3531238544981630f, | 309 | 73.1k | -0.4082482904638630f, | 310 | 73.1k | 0.4826689115059883f, | 311 | 73.1k | 0.1741941265991622f, | 312 | 73.1k | -0.0476868035022925f, | 313 | 73.1k | 0.1253805944856366f, | 314 | 73.1k | -0.4326608024727445f, | 315 | 73.1k | -0.2546827712406646f, | 316 | 73.1k | }, | 317 | 73.1k | { | 318 | 73.1k | 0.2500000000000000, | 319 | 73.1k | -0.1014005039375377f, | 320 | 73.1k | -0.4444481661973438f, | 321 | 73.1k | 0.3085497062849487f, | 322 | 73.1k | 0.0000000000000000, | 323 | 73.1k | -0.0643507165794628f, | 324 | 73.1k | 0.1585450355183970f, | 325 | 73.1k | -0.5112616136592012f, | 326 | 73.1k | 0.2579236279634129f, | 327 | 73.1k | 0.0000000000000000, | 328 | 73.1k | -0.0812611176717504f, | 329 | 73.1k | -0.1856718091610990f, | 330 | 73.1k | -0.3416446842253373f, | 331 | 73.1k | 0.3302282550303805f, | 332 | 73.1k | 0.0702790691196282f, | 333 | 73.1k | -0.0741750459581023f, | 334 | 73.1k | }, | 335 | 73.1k | { | 336 | 73.1k | 0.2500000000000000, | 337 | 73.1k | -0.1014005039375376f, | 338 | 73.1k | -0.2929100136981264f, | 339 | 73.1k | 0.0000000000000000, | 340 | 73.1k | 0.0000000000000000, | 341 | 73.1k | -0.0643507165794627f, | 342 | 73.1k | 0.3935103426921022f, | 343 | 73.1k | 0.0657870154914254f, | 344 | 73.1k | 0.0000000000000000, | 345 | 73.1k | 0.4082482904638634f, | 346 | 73.1k | 0.3078822139579031f, | 347 | 73.1k | 0.3852501370925211f, | 348 | 73.1k | -0.0857401903551927f, | 349 | 73.1k | -0.4613374887461554f, | 350 | 73.1k | 0.0000000000000000, | 351 | 73.1k | 0.2191868483885728f, | 352 | 73.1k | }, | 353 | 73.1k | { | 354 | 73.1k | 0.2500000000000000, | 355 | 73.1k | -0.1014005039375376f, | 356 | 73.1k | -0.1137907446044814f, | 357 | 73.1k | -0.1464291867126654f, | 358 | 73.1k | 0.0000000000000000, | 359 | 73.1k | -0.0643507165794627f, | 360 | 73.1k | 0.0829816309488214f, | 361 | 73.1k | 0.2388977352334547f, | 362 | 73.1k | -0.3531238544981624f, | 363 | 73.1k | 0.4082482904638630f, | 364 | 73.1k | -0.4826689115059858f, | 365 | 73.1k | -0.1741941265991621f, | 366 | 73.1k | -0.0476868035022928f, | 367 | 73.1k | 0.1253805944856431f, | 368 | 73.1k | -0.4326608024727457f, | 369 | 73.1k | -0.2546827712406641f, | 370 | 73.1k | }, | 371 | 73.1k | { | 372 | 73.1k | 0.2500000000000000, | 373 | 73.1k | -0.1014005039375374f, | 374 | 73.1k | 0.0000000000000000, | 375 | 73.1k | 0.4251149611657548f, | 376 | 73.1k | 0.0000000000000000, | 377 | 73.1k | -0.0643507165794626f, | 378 | 73.1k | -0.4517556589999480f, | 379 | 73.1k | 0.0000000000000000, | 380 | 73.1k | -0.6035859033230976f, | 381 | 73.1k | 0.0000000000000000, | 382 | 73.1k | 0.0000000000000000, | 383 | 73.1k | 0.0000000000000000, | 384 | 73.1k | -0.1426608480880724f, | 385 | 73.1k | -0.1381354035075845f, | 386 | 73.1k | 0.3487520519930227f, | 387 | 73.1k | 0.1135498731499429f, | 388 | 73.1k | }, | 389 | 73.1k | }; | 390 | | | 391 | 73.1k | const HWY_CAPPED(float, 16) d; | 392 | 219k | for (size_t i = 0; i < 16; i += Lanes(d)) { | 393 | 146k | auto scalar = Zero(d); | 394 | 2.48M | for (size_t j = 0; j < 16; j++) { | 395 | 2.33M | auto px = Set(d, pixels[j]); | 396 | 2.33M | auto basis = Load(d, k4x4AFVBasisTranspose[j] + i); | 397 | 2.33M | scalar = MulAdd(px, basis, scalar); | 398 | 2.33M | } | 399 | 146k | Store(scalar, d, coeffs + i); | 400 | 146k | } | 401 | 73.1k | } |
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*) enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*) Line | Count | Source | 99 | 73.1k | void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) { | 100 | 73.1k | HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = { | 101 | 73.1k | { | 102 | 73.1k | 0.2500000000000000, | 103 | 73.1k | 0.8769029297991420f, | 104 | 73.1k | 0.0000000000000000, | 105 | 73.1k | 0.0000000000000000, | 106 | 73.1k | 0.0000000000000000, | 107 | 73.1k | -0.4105377591765233f, | 108 | 73.1k | 0.0000000000000000, | 109 | 73.1k | 0.0000000000000000, | 110 | 73.1k | 0.0000000000000000, | 111 | 73.1k | 0.0000000000000000, | 112 | 73.1k | 0.0000000000000000, | 113 | 73.1k | 0.0000000000000000, | 114 | 73.1k | 0.0000000000000000, | 115 | 73.1k | 0.0000000000000000, | 116 | 73.1k | 0.0000000000000000, | 117 | 73.1k | 0.0000000000000000, | 118 | 73.1k | }, | 119 | 73.1k | { | 120 | 73.1k | 0.2500000000000000, | 121 | 73.1k | 0.2206518106944235f, | 122 | 73.1k | 0.0000000000000000, | 123 | 73.1k | 0.0000000000000000, | 124 | 73.1k | -0.7071067811865474f, | 125 | 73.1k | 0.6235485373547691f, | 126 | 73.1k | 0.0000000000000000, | 127 | 73.1k | 0.0000000000000000, | 128 | 73.1k | 0.0000000000000000, | 129 | 73.1k | 0.0000000000000000, | 130 | 73.1k | 0.0000000000000000, | 131 | 73.1k | 0.0000000000000000, | 132 | 73.1k | 0.0000000000000000, | 133 | 73.1k | 0.0000000000000000, | 134 | 73.1k | 0.0000000000000000, | 135 | 73.1k | 0.0000000000000000, | 136 | 73.1k | }, | 137 | 73.1k | { | 138 | 73.1k | 0.2500000000000000, | 139 | 73.1k | -0.1014005039375376f, | 140 | 73.1k | 0.4067007583026075f, | 141 | 73.1k | -0.2125574805828875f, | 142 | 73.1k | 0.0000000000000000, | 143 | 73.1k | -0.0643507165794627f, | 144 | 73.1k | -0.4517556589999482f, | 145 | 73.1k | -0.3046847507248690f, | 146 | 73.1k | 0.3017929516615495f, | 147 | 73.1k | 0.4082482904638627f, | 148 | 73.1k | 0.1747866975480809f, | 149 | 73.1k | -0.2110560104933578f, | 150 | 73.1k | -0.1426608480880726f, | 151 | 73.1k | -0.1381354035075859f, | 152 | 73.1k | -0.1743760259965107f, | 153 | 73.1k | 0.1135498731499434f, | 154 | 73.1k | }, | 155 | 73.1k | { | 156 | 73.1k | 0.2500000000000000, | 157 | 73.1k | -0.1014005039375375f, | 158 | 73.1k | 0.4444481661973445f, | 159 | 73.1k | 0.3085497062849767f, | 160 | 73.1k | 0.0000000000000000f, | 161 | 73.1k | -0.0643507165794627f, | 162 | 73.1k | 0.1585450355184006f, | 163 | 73.1k | 0.5112616136591823f, | 164 | 73.1k | 0.2579236279634118f, | 165 | 73.1k | 0.0000000000000000, | 166 | 73.1k | 0.0812611176717539f, | 167 | 73.1k | 0.1856718091610980f, | 168 | 73.1k | -0.3416446842253372f, | 169 | 73.1k | 0.3302282550303788f, | 170 | 73.1k | 0.0702790691196284f, | 171 | 73.1k | -0.0741750459581035f, | 172 | 73.1k | }, | 173 | 73.1k | { | 174 | 73.1k | 0.2500000000000000, | 175 | 73.1k | 0.2206518106944236f, | 176 | 73.1k | 0.0000000000000000, | 177 | 73.1k | 0.0000000000000000, | 178 | 73.1k | 0.7071067811865476f, | 179 | 73.1k | 0.6235485373547694f, | 180 | 73.1k | 0.0000000000000000, | 181 | 73.1k | 0.0000000000000000, | 182 | 73.1k | 0.0000000000000000, | 183 | 73.1k | 0.0000000000000000, | 184 | 73.1k | 0.0000000000000000, | 185 | 73.1k | 0.0000000000000000, | 186 | 73.1k | 0.0000000000000000, | 187 | 73.1k | 0.0000000000000000, | 188 | 73.1k | 0.0000000000000000, | 189 | 73.1k | 0.0000000000000000, | 190 | 73.1k | }, | 191 | 73.1k | { | 192 | 73.1k | 0.2500000000000000, | 193 | 73.1k | -0.1014005039375378f, | 194 | 73.1k | 0.0000000000000000, | 195 | 73.1k | 0.4706702258572536f, | 196 | 73.1k | 0.0000000000000000, | 197 | 73.1k | -0.0643507165794628f, | 198 | 73.1k | -0.0403851516082220f, | 199 | 73.1k | 0.0000000000000000, | 200 | 73.1k | 0.1627234014286620f, | 201 | 73.1k | 0.0000000000000000, | 202 | 73.1k | 0.0000000000000000, | 203 | 73.1k | 0.0000000000000000, | 204 | 73.1k | 0.7367497537172237f, | 205 | 73.1k | 0.0875511500058708f, | 206 | 73.1k | -0.2921026642334881f, | 207 | 73.1k | 0.1940289303259434f, | 208 | 73.1k | }, | 209 | 73.1k | { | 210 | 73.1k | 0.2500000000000000, | 211 | 73.1k | -0.1014005039375377f, | 212 | 73.1k | 0.1957439937204294f, | 213 | 73.1k | -0.1621205195722993f, | 214 | 73.1k | 0.0000000000000000, | 215 | 73.1k | -0.0643507165794628f, | 216 | 73.1k | 0.0074182263792424f, | 217 | 73.1k | -0.2904801297289980f, | 218 | 73.1k | 0.0952002265347504f, | 219 | 73.1k | 0.0000000000000000, | 220 | 73.1k | -0.3675398009862027f, | 221 | 73.1k | 0.4921585901373873f, | 222 | 73.1k | 0.2462710772207515f, | 223 | 73.1k | -0.0794670660590957f, | 224 | 73.1k | 0.3623817333531167f, | 225 | 73.1k | -0.4351904965232280f, | 226 | 73.1k | }, | 227 | 73.1k | { | 228 | 73.1k | 0.2500000000000000, | 229 | 73.1k | -0.1014005039375376f, | 230 | 73.1k | 0.2929100136981264f, | 231 | 73.1k | 0.0000000000000000, | 232 | 73.1k | 0.0000000000000000, | 233 | 73.1k | -0.0643507165794627f, | 234 | 73.1k | 0.3935103426921017f, | 235 | 73.1k | -0.0657870154914280f, | 236 | 73.1k | 0.0000000000000000, | 237 | 73.1k | -0.4082482904638628f, | 238 | 73.1k | -0.3078822139579090f, | 239 | 73.1k | -0.3852501370925192f, | 240 | 73.1k | -0.0857401903551931f, | 241 | 73.1k | -0.4613374887461511f, | 242 | 73.1k | 0.0000000000000000, | 243 | 73.1k | 0.2191868483885747f, | 244 | 73.1k | }, | 245 | 73.1k | { | 246 | 73.1k | 0.2500000000000000, | 247 | 73.1k | -0.1014005039375376f, | 248 | 73.1k | -0.4067007583026072f, | 249 | 73.1k | -0.2125574805828705f, | 250 | 73.1k | 0.0000000000000000, | 251 | 73.1k | -0.0643507165794627f, | 252 | 73.1k | -0.4517556589999464f, | 253 | 73.1k | 0.3046847507248840f, | 254 | 73.1k | 0.3017929516615503f, | 255 | 73.1k | -0.4082482904638635f, | 256 | 73.1k | -0.1747866975480813f, | 257 | 73.1k | 0.2110560104933581f, | 258 | 73.1k | -0.1426608480880734f, | 259 | 73.1k | -0.1381354035075829f, | 260 | 73.1k | -0.1743760259965108f, | 261 | 73.1k | 0.1135498731499426f, | 262 | 73.1k | }, | 263 | 73.1k | { | 264 | 73.1k | 0.2500000000000000, | 265 | 73.1k | -0.1014005039375377f, | 266 | 73.1k | -0.1957439937204287f, | 267 | 73.1k | -0.1621205195722833f, | 268 | 73.1k | 0.0000000000000000, | 269 | 73.1k | -0.0643507165794628f, | 270 | 73.1k | 0.0074182263792444f, | 271 | 73.1k | 0.2904801297290076f, | 272 | 73.1k | 0.0952002265347505f, | 273 | 73.1k | 0.0000000000000000, | 274 | 73.1k | 0.3675398009862011f, | 275 | 73.1k | -0.4921585901373891f, | 276 | 73.1k | 0.2462710772207514f, | 277 | 73.1k | -0.0794670660591026f, | 278 | 73.1k | 0.3623817333531165f, | 279 | 73.1k | -0.4351904965232251f, | 280 | 73.1k | }, | 281 | 73.1k | { | 282 | 73.1k | 0.2500000000000000, | 283 | 73.1k | -0.1014005039375375f, | 284 | 73.1k | 0.0000000000000000, | 285 | 73.1k | -0.4706702258572528f, | 286 | 73.1k | 0.0000000000000000, | 287 | 73.1k | -0.0643507165794627f, | 288 | 73.1k | 0.1107416575309343f, | 289 | 73.1k | 0.0000000000000000, | 290 | 73.1k | -0.1627234014286617f, | 291 | 73.1k | 0.0000000000000000, | 292 | 73.1k | 0.0000000000000000, | 293 | 73.1k | 0.0000000000000000, | 294 | 73.1k | 0.1488339922711357f, | 295 | 73.1k | 0.4972464710953509f, | 296 | 73.1k | 0.2921026642334879f, | 297 | 73.1k | 0.5550443808910661f, | 298 | 73.1k | }, | 299 | 73.1k | { | 300 | 73.1k | 0.2500000000000000, | 301 | 73.1k | -0.1014005039375377f, | 302 | 73.1k | 0.1137907446044809f, | 303 | 73.1k | -0.1464291867126764f, | 304 | 73.1k | 0.0000000000000000, | 305 | 73.1k | -0.0643507165794628f, | 306 | 73.1k | 0.0829816309488205f, | 307 | 73.1k | -0.2388977352334460f, | 308 | 73.1k | -0.3531238544981630f, | 309 | 73.1k | -0.4082482904638630f, | 310 | 73.1k | 0.4826689115059883f, | 311 | 73.1k | 0.1741941265991622f, | 312 | 73.1k | -0.0476868035022925f, | 313 | 73.1k | 0.1253805944856366f, | 314 | 73.1k | -0.4326608024727445f, | 315 | 73.1k | -0.2546827712406646f, | 316 | 73.1k | }, | 317 | 73.1k | { | 318 | 73.1k | 0.2500000000000000, | 319 | 73.1k | -0.1014005039375377f, | 320 | 73.1k | -0.4444481661973438f, | 321 | 73.1k | 0.3085497062849487f, | 322 | 73.1k | 0.0000000000000000, | 323 | 73.1k | -0.0643507165794628f, | 324 | 73.1k | 0.1585450355183970f, | 325 | 73.1k | -0.5112616136592012f, | 326 | 73.1k | 0.2579236279634129f, | 327 | 73.1k | 0.0000000000000000, | 328 | 73.1k | -0.0812611176717504f, | 329 | 73.1k | -0.1856718091610990f, | 330 | 73.1k | -0.3416446842253373f, | 331 | 73.1k | 0.3302282550303805f, | 332 | 73.1k | 0.0702790691196282f, | 333 | 73.1k | -0.0741750459581023f, | 334 | 73.1k | }, | 335 | 73.1k | { | 336 | 73.1k | 0.2500000000000000, | 337 | 73.1k | -0.1014005039375376f, | 338 | 73.1k | -0.2929100136981264f, | 339 | 73.1k | 0.0000000000000000, | 340 | 73.1k | 0.0000000000000000, | 341 | 73.1k | -0.0643507165794627f, | 342 | 73.1k | 0.3935103426921022f, | 343 | 73.1k | 0.0657870154914254f, | 344 | 73.1k | 0.0000000000000000, | 345 | 73.1k | 0.4082482904638634f, | 346 | 73.1k | 0.3078822139579031f, | 347 | 73.1k | 0.3852501370925211f, | 348 | 73.1k | -0.0857401903551927f, | 349 | 73.1k | -0.4613374887461554f, | 350 | 73.1k | 0.0000000000000000, | 351 | 73.1k | 0.2191868483885728f, | 352 | 73.1k | }, | 353 | 73.1k | { | 354 | 73.1k | 0.2500000000000000, | 355 | 73.1k | -0.1014005039375376f, | 356 | 73.1k | -0.1137907446044814f, | 357 | 73.1k | -0.1464291867126654f, | 358 | 73.1k | 0.0000000000000000, | 359 | 73.1k | -0.0643507165794627f, | 360 | 73.1k | 0.0829816309488214f, | 361 | 73.1k | 0.2388977352334547f, | 362 | 73.1k | -0.3531238544981624f, | 363 | 73.1k | 0.4082482904638630f, | 364 | 73.1k | -0.4826689115059858f, | 365 | 73.1k | -0.1741941265991621f, | 366 | 73.1k | -0.0476868035022928f, | 367 | 73.1k | 0.1253805944856431f, | 368 | 73.1k | -0.4326608024727457f, | 369 | 73.1k | -0.2546827712406641f, | 370 | 73.1k | }, | 371 | 73.1k | { | 372 | 73.1k | 0.2500000000000000, | 373 | 73.1k | -0.1014005039375374f, | 374 | 73.1k | 0.0000000000000000, | 375 | 73.1k | 0.4251149611657548f, | 376 | 73.1k | 0.0000000000000000, | 377 | 73.1k | -0.0643507165794626f, | 378 | 73.1k | -0.4517556589999480f, | 379 | 73.1k | 0.0000000000000000, | 380 | 73.1k | -0.6035859033230976f, | 381 | 73.1k | 0.0000000000000000, | 382 | 73.1k | 0.0000000000000000, | 383 | 73.1k | 0.0000000000000000, | 384 | 73.1k | -0.1426608480880724f, | 385 | 73.1k | -0.1381354035075845f, | 386 | 73.1k | 0.3487520519930227f, | 387 | 73.1k | 0.1135498731499429f, | 388 | 73.1k | }, | 389 | 73.1k | }; | 390 | | | 391 | 73.1k | const HWY_CAPPED(float, 16) d; | 392 | 219k | for (size_t i = 0; i < 16; i += Lanes(d)) { | 393 | 146k | auto scalar = Zero(d); | 394 | 2.48M | for (size_t j = 0; j < 16; j++) { | 395 | 2.33M | auto px = Set(d, pixels[j]); | 396 | 2.33M | auto basis = Load(d, k4x4AFVBasisTranspose[j] + i); | 397 | 2.33M | scalar = MulAdd(px, basis, scalar); | 398 | 2.33M | } | 399 | 146k | Store(scalar, d, coeffs + i); | 400 | 146k | } | 401 | 73.1k | } |
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*) Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*) enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*) Line | Count | Source | 99 | 4.77M | void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) { | 100 | 4.77M | HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = { | 101 | 4.77M | { | 102 | 4.77M | 0.2500000000000000, | 103 | 4.77M | 0.8769029297991420f, | 104 | 4.77M | 0.0000000000000000, | 105 | 4.77M | 0.0000000000000000, | 106 | 4.77M | 0.0000000000000000, | 107 | 4.77M | -0.4105377591765233f, | 108 | 4.77M | 0.0000000000000000, | 109 | 4.77M | 0.0000000000000000, | 110 | 4.77M | 0.0000000000000000, | 111 | 4.77M | 0.0000000000000000, | 112 | 4.77M | 0.0000000000000000, | 113 | 4.77M | 0.0000000000000000, | 114 | 4.77M | 0.0000000000000000, | 115 | 4.77M | 0.0000000000000000, | 116 | 4.77M | 0.0000000000000000, | 117 | 4.77M | 0.0000000000000000, | 118 | 4.77M | }, | 119 | 4.77M | { | 120 | 4.77M | 0.2500000000000000, | 121 | 4.77M | 0.2206518106944235f, | 122 | 4.77M | 0.0000000000000000, | 123 | 4.77M | 0.0000000000000000, | 124 | 4.77M | -0.7071067811865474f, | 125 | 4.77M | 0.6235485373547691f, | 126 | 4.77M | 0.0000000000000000, | 127 | 4.77M | 0.0000000000000000, | 128 | 4.77M | 0.0000000000000000, | 129 | 4.77M | 0.0000000000000000, | 130 | 4.77M | 0.0000000000000000, | 131 | 4.77M | 0.0000000000000000, | 132 | 4.77M | 0.0000000000000000, | 133 | 4.77M | 0.0000000000000000, | 134 | 4.77M | 0.0000000000000000, | 135 | 4.77M | 0.0000000000000000, | 136 | 4.77M | }, | 137 | 4.77M | { | 138 | 4.77M | 0.2500000000000000, | 139 | 4.77M | -0.1014005039375376f, | 140 | 4.77M | 0.4067007583026075f, | 141 | 4.77M | -0.2125574805828875f, | 142 | 4.77M | 0.0000000000000000, | 143 | 4.77M | -0.0643507165794627f, | 144 | 4.77M | -0.4517556589999482f, | 145 | 4.77M | -0.3046847507248690f, | 146 | 4.77M | 0.3017929516615495f, | 147 | 4.77M | 0.4082482904638627f, | 148 | 4.77M | 0.1747866975480809f, | 149 | 4.77M | -0.2110560104933578f, | 150 | 4.77M | -0.1426608480880726f, | 151 | 4.77M | -0.1381354035075859f, | 152 | 4.77M | -0.1743760259965107f, | 153 | 4.77M | 0.1135498731499434f, | 154 | 4.77M | }, | 155 | 4.77M | { | 156 | 4.77M | 0.2500000000000000, | 157 | 4.77M | -0.1014005039375375f, | 158 | 4.77M | 0.4444481661973445f, | 159 | 4.77M | 0.3085497062849767f, | 160 | 4.77M | 0.0000000000000000f, | 161 | 4.77M | -0.0643507165794627f, | 162 | 4.77M | 0.1585450355184006f, | 163 | 4.77M | 0.5112616136591823f, | 164 | 4.77M | 0.2579236279634118f, | 165 | 4.77M | 0.0000000000000000, | 166 | 4.77M | 0.0812611176717539f, | 167 | 4.77M | 0.1856718091610980f, | 168 | 4.77M | -0.3416446842253372f, | 169 | 4.77M | 0.3302282550303788f, | 170 | 4.77M | 0.0702790691196284f, | 171 | 4.77M | -0.0741750459581035f, | 172 | 4.77M | }, | 173 | 4.77M | { | 174 | 4.77M | 0.2500000000000000, | 175 | 4.77M | 0.2206518106944236f, | 176 | 4.77M | 0.0000000000000000, | 177 | 4.77M | 0.0000000000000000, | 178 | 4.77M | 0.7071067811865476f, | 179 | 4.77M | 0.6235485373547694f, | 180 | 4.77M | 0.0000000000000000, | 181 | 4.77M | 0.0000000000000000, | 182 | 4.77M | 0.0000000000000000, | 183 | 4.77M | 0.0000000000000000, | 184 | 4.77M | 0.0000000000000000, | 185 | 4.77M | 0.0000000000000000, | 186 | 4.77M | 0.0000000000000000, | 187 | 4.77M | 0.0000000000000000, | 188 | 4.77M | 0.0000000000000000, | 189 | 4.77M | 0.0000000000000000, | 190 | 4.77M | }, | 191 | 4.77M | { | 192 | 4.77M | 0.2500000000000000, | 193 | 4.77M | -0.1014005039375378f, | 194 | 4.77M | 0.0000000000000000, | 195 | 4.77M | 0.4706702258572536f, | 196 | 4.77M | 0.0000000000000000, | 197 | 4.77M | -0.0643507165794628f, | 198 | 4.77M | -0.0403851516082220f, | 199 | 4.77M | 0.0000000000000000, | 200 | 4.77M | 0.1627234014286620f, | 201 | 4.77M | 0.0000000000000000, | 202 | 4.77M | 0.0000000000000000, | 203 | 4.77M | 0.0000000000000000, | 204 | 4.77M | 0.7367497537172237f, | 205 | 4.77M | 0.0875511500058708f, | 206 | 4.77M | -0.2921026642334881f, | 207 | 4.77M | 0.1940289303259434f, | 208 | 4.77M | }, | 209 | 4.77M | { | 210 | 4.77M | 0.2500000000000000, | 211 | 4.77M | -0.1014005039375377f, | 212 | 4.77M | 0.1957439937204294f, | 213 | 4.77M | -0.1621205195722993f, | 214 | 4.77M | 0.0000000000000000, | 215 | 4.77M | -0.0643507165794628f, | 216 | 4.77M | 0.0074182263792424f, | 217 | 4.77M | -0.2904801297289980f, | 218 | 4.77M | 0.0952002265347504f, | 219 | 4.77M | 0.0000000000000000, | 220 | 4.77M | -0.3675398009862027f, | 221 | 4.77M | 0.4921585901373873f, | 222 | 4.77M | 0.2462710772207515f, | 223 | 4.77M | -0.0794670660590957f, | 224 | 4.77M | 0.3623817333531167f, | 225 | 4.77M | -0.4351904965232280f, | 226 | 4.77M | }, | 227 | 4.77M | { | 228 | 4.77M | 0.2500000000000000, | 229 | 4.77M | -0.1014005039375376f, | 230 | 4.77M | 0.2929100136981264f, | 231 | 4.77M | 0.0000000000000000, | 232 | 4.77M | 0.0000000000000000, | 233 | 4.77M | -0.0643507165794627f, | 234 | 4.77M | 0.3935103426921017f, | 235 | 4.77M | -0.0657870154914280f, | 236 | 4.77M | 0.0000000000000000, | 237 | 4.77M | -0.4082482904638628f, | 238 | 4.77M | -0.3078822139579090f, | 239 | 4.77M | -0.3852501370925192f, | 240 | 4.77M | -0.0857401903551931f, | 241 | 4.77M | -0.4613374887461511f, | 242 | 4.77M | 0.0000000000000000, | 243 | 4.77M | 0.2191868483885747f, | 244 | 4.77M | }, | 245 | 4.77M | { | 246 | 4.77M | 0.2500000000000000, | 247 | 4.77M | -0.1014005039375376f, | 248 | 4.77M | -0.4067007583026072f, | 249 | 4.77M | -0.2125574805828705f, | 250 | 4.77M | 0.0000000000000000, | 251 | 4.77M | -0.0643507165794627f, | 252 | 4.77M | -0.4517556589999464f, | 253 | 4.77M | 0.3046847507248840f, | 254 | 4.77M | 0.3017929516615503f, | 255 | 4.77M | -0.4082482904638635f, | 256 | 4.77M | -0.1747866975480813f, | 257 | 4.77M | 0.2110560104933581f, | 258 | 4.77M | -0.1426608480880734f, | 259 | 4.77M | -0.1381354035075829f, | 260 | 4.77M | -0.1743760259965108f, | 261 | 4.77M | 0.1135498731499426f, | 262 | 4.77M | }, | 263 | 4.77M | { | 264 | 4.77M | 0.2500000000000000, | 265 | 4.77M | -0.1014005039375377f, | 266 | 4.77M | -0.1957439937204287f, | 267 | 4.77M | -0.1621205195722833f, | 268 | 4.77M | 0.0000000000000000, | 269 | 4.77M | -0.0643507165794628f, | 270 | 4.77M | 0.0074182263792444f, | 271 | 4.77M | 0.2904801297290076f, | 272 | 4.77M | 0.0952002265347505f, | 273 | 4.77M | 0.0000000000000000, | 274 | 4.77M | 0.3675398009862011f, | 275 | 4.77M | -0.4921585901373891f, | 276 | 4.77M | 0.2462710772207514f, | 277 | 4.77M | -0.0794670660591026f, | 278 | 4.77M | 0.3623817333531165f, | 279 | 4.77M | -0.4351904965232251f, | 280 | 4.77M | }, | 281 | 4.77M | { | 282 | 4.77M | 0.2500000000000000, | 283 | 4.77M | -0.1014005039375375f, | 284 | 4.77M | 0.0000000000000000, | 285 | 4.77M | -0.4706702258572528f, | 286 | 4.77M | 0.0000000000000000, | 287 | 4.77M | -0.0643507165794627f, | 288 | 4.77M | 0.1107416575309343f, | 289 | 4.77M | 0.0000000000000000, | 290 | 4.77M | -0.1627234014286617f, | 291 | 4.77M | 0.0000000000000000, | 292 | 4.77M | 0.0000000000000000, | 293 | 4.77M | 0.0000000000000000, | 294 | 4.77M | 0.1488339922711357f, | 295 | 4.77M | 0.4972464710953509f, | 296 | 4.77M | 0.2921026642334879f, | 297 | 4.77M | 0.5550443808910661f, | 298 | 4.77M | }, | 299 | 4.77M | { | 300 | 4.77M | 0.2500000000000000, | 301 | 4.77M | -0.1014005039375377f, | 302 | 4.77M | 0.1137907446044809f, | 303 | 4.77M | -0.1464291867126764f, | 304 | 4.77M | 0.0000000000000000, | 305 | 4.77M | -0.0643507165794628f, | 306 | 4.77M | 0.0829816309488205f, | 307 | 4.77M | -0.2388977352334460f, | 308 | 4.77M | -0.3531238544981630f, | 309 | 4.77M | -0.4082482904638630f, | 310 | 4.77M | 0.4826689115059883f, | 311 | 4.77M | 0.1741941265991622f, | 312 | 4.77M | -0.0476868035022925f, | 313 | 4.77M | 0.1253805944856366f, | 314 | 4.77M | -0.4326608024727445f, | 315 | 4.77M | -0.2546827712406646f, | 316 | 4.77M | }, | 317 | 4.77M | { | 318 | 4.77M | 0.2500000000000000, | 319 | 4.77M | -0.1014005039375377f, | 320 | 4.77M | -0.4444481661973438f, | 321 | 4.77M | 0.3085497062849487f, | 322 | 4.77M | 0.0000000000000000, | 323 | 4.77M | -0.0643507165794628f, | 324 | 4.77M | 0.1585450355183970f, | 325 | 4.77M | -0.5112616136592012f, | 326 | 4.77M | 0.2579236279634129f, | 327 | 4.77M | 0.0000000000000000, | 328 | 4.77M | -0.0812611176717504f, | 329 | 4.77M | -0.1856718091610990f, | 330 | 4.77M | -0.3416446842253373f, | 331 | 4.77M | 0.3302282550303805f, | 332 | 4.77M | 0.0702790691196282f, | 333 | 4.77M | -0.0741750459581023f, | 334 | 4.77M | }, | 335 | 4.77M | { | 336 | 4.77M | 0.2500000000000000, | 337 | 4.77M | -0.1014005039375376f, | 338 | 4.77M | -0.2929100136981264f, | 339 | 4.77M | 0.0000000000000000, | 340 | 4.77M | 0.0000000000000000, | 341 | 4.77M | -0.0643507165794627f, | 342 | 4.77M | 0.3935103426921022f, | 343 | 4.77M | 0.0657870154914254f, | 344 | 4.77M | 0.0000000000000000, | 345 | 4.77M | 0.4082482904638634f, | 346 | 4.77M | 0.3078822139579031f, | 347 | 4.77M | 0.3852501370925211f, | 348 | 4.77M | -0.0857401903551927f, | 349 | 4.77M | -0.4613374887461554f, | 350 | 4.77M | 0.0000000000000000, | 351 | 4.77M | 0.2191868483885728f, | 352 | 4.77M | }, | 353 | 4.77M | { | 354 | 4.77M | 0.2500000000000000, | 355 | 4.77M | -0.1014005039375376f, | 356 | 4.77M | -0.1137907446044814f, | 357 | 4.77M | -0.1464291867126654f, | 358 | 4.77M | 0.0000000000000000, | 359 | 4.77M | -0.0643507165794627f, | 360 | 4.77M | 0.0829816309488214f, | 361 | 4.77M | 0.2388977352334547f, | 362 | 4.77M | -0.3531238544981624f, | 363 | 4.77M | 0.4082482904638630f, | 364 | 4.77M | -0.4826689115059858f, | 365 | 4.77M | -0.1741941265991621f, | 366 | 4.77M | -0.0476868035022928f, | 367 | 4.77M | 0.1253805944856431f, | 368 | 4.77M | -0.4326608024727457f, | 369 | 4.77M | -0.2546827712406641f, | 370 | 4.77M | }, | 371 | 4.77M | { | 372 | 4.77M | 0.2500000000000000, | 373 | 4.77M | -0.1014005039375374f, | 374 | 4.77M | 0.0000000000000000, | 375 | 4.77M | 0.4251149611657548f, | 376 | 4.77M | 0.0000000000000000, | 377 | 4.77M | -0.0643507165794626f, | 378 | 4.77M | -0.4517556589999480f, | 379 | 4.77M | 0.0000000000000000, | 380 | 4.77M | -0.6035859033230976f, | 381 | 4.77M | 0.0000000000000000, | 382 | 4.77M | 0.0000000000000000, | 383 | 4.77M | 0.0000000000000000, | 384 | 4.77M | -0.1426608480880724f, | 385 | 4.77M | -0.1381354035075845f, | 386 | 4.77M | 0.3487520519930227f, | 387 | 4.77M | 0.1135498731499429f, | 388 | 4.77M | }, | 389 | 4.77M | }; | 390 | | | 391 | 4.77M | const HWY_CAPPED(float, 16) d; | 392 | 14.3M | for (size_t i = 0; i < 16; i += Lanes(d)) { | 393 | 9.55M | auto scalar = Zero(d); | 394 | 162M | for (size_t j = 0; j < 16; j++) { | 395 | 152M | auto px = Set(d, pixels[j]); | 396 | 152M | auto basis = Load(d, k4x4AFVBasisTranspose[j] + i); | 397 | 152M | scalar = MulAdd(px, basis, scalar); | 398 | 152M | } | 399 | 9.55M | Store(scalar, d, coeffs + i); | 400 | 9.55M | } | 401 | 4.77M | } |
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*) |
402 | | |
403 | | // Coefficient layout: |
404 | | // - (even, even) positions hold AFV coefficients |
405 | | // - (odd, even) positions hold DCT4x4 coefficients |
406 | | // - (any, odd) positions hold DCT4x8 coefficients |
407 | | template <size_t afv_kind> |
408 | | void AFVTransformFromPixels(const float* JXL_RESTRICT pixels, |
409 | | size_t pixels_stride, |
410 | 4.92M | float* JXL_RESTRICT coefficients) { |
411 | 4.92M | HWY_ALIGN float scratch_space[4 * 8 * 5]; |
412 | 4.92M | size_t afv_x = afv_kind & 1; |
413 | 4.92M | size_t afv_y = afv_kind / 2; |
414 | 4.92M | HWY_ALIGN float block[4 * 8] = {}; |
415 | 24.6M | for (size_t iy = 0; iy < 4; iy++) { |
416 | 98.4M | for (size_t ix = 0; ix < 4; ix++) { |
417 | 78.7M | block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] = |
418 | 78.7M | pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x]; |
419 | 78.7M | } |
420 | 19.6M | } |
421 | | // AFV coefficients in (even, even) positions. |
422 | 4.92M | HWY_ALIGN float coeff[4 * 4]; |
423 | 4.92M | AFVDCT4x4(block, coeff); |
424 | 24.6M | for (size_t iy = 0; iy < 4; iy++) { |
425 | 98.4M | for (size_t ix = 0; ix < 4; ix++) { |
426 | 78.7M | coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix]; |
427 | 78.7M | } |
428 | 19.6M | } |
429 | | // 4x4 DCT of the block with same y and different x. |
430 | 4.92M | ComputeScaledDCT<4, 4>()( |
431 | 4.92M | DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4), |
432 | 4.92M | pixels_stride), |
433 | 4.92M | block, scratch_space); |
434 | | // ... in (odd, even) positions. |
435 | 24.6M | for (size_t iy = 0; iy < 4; iy++) { |
436 | 177M | for (size_t ix = 0; ix < 8; ix++) { |
437 | 157M | coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix]; |
438 | 157M | } |
439 | 19.6M | } |
440 | | // 4x8 DCT of the other half of the block. |
441 | 4.92M | ComputeScaledDCT<4, 8>()( |
442 | 4.92M | DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride), |
443 | 4.92M | block, scratch_space); |
444 | 24.6M | for (size_t iy = 0; iy < 4; iy++) { |
445 | 177M | for (size_t ix = 0; ix < 8; ix++) { |
446 | 157M | coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix]; |
447 | 157M | } |
448 | 19.6M | } |
449 | 4.92M | float block00 = coefficients[0] * 0.25f; |
450 | 4.92M | float block01 = coefficients[1]; |
451 | 4.92M | float block10 = coefficients[8]; |
452 | 4.92M | coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f; |
453 | 4.92M | coefficients[1] = (block00 - block01) * 0.5f; |
454 | 4.92M | coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f; |
455 | 4.92M | } Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*) enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*) Line | Count | Source | 410 | 20.0k | float* JXL_RESTRICT coefficients) { | 411 | 20.0k | HWY_ALIGN float scratch_space[4 * 8 * 5]; | 412 | 20.0k | size_t afv_x = afv_kind & 1; | 413 | 20.0k | size_t afv_y = afv_kind / 2; | 414 | 20.0k | HWY_ALIGN float block[4 * 8] = {}; | 415 | 100k | for (size_t iy = 0; iy < 4; iy++) { | 416 | 400k | for (size_t ix = 0; ix < 4; ix++) { | 417 | 320k | block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] = | 418 | 320k | pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x]; | 419 | 320k | } | 420 | 80.0k | } | 421 | | // AFV coefficients in (even, even) positions. | 422 | 20.0k | HWY_ALIGN float coeff[4 * 4]; | 423 | 20.0k | AFVDCT4x4(block, coeff); | 424 | 100k | for (size_t iy = 0; iy < 4; iy++) { | 425 | 400k | for (size_t ix = 0; ix < 4; ix++) { | 426 | 320k | coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix]; | 427 | 320k | } | 428 | 80.0k | } | 429 | | // 4x4 DCT of the block with same y and different x. | 430 | 20.0k | ComputeScaledDCT<4, 4>()( | 431 | 20.0k | DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4), | 432 | 20.0k | pixels_stride), | 433 | 20.0k | block, scratch_space); | 434 | | // ... in (odd, even) positions. | 435 | 100k | for (size_t iy = 0; iy < 4; iy++) { | 436 | 720k | for (size_t ix = 0; ix < 8; ix++) { | 437 | 640k | coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix]; | 438 | 640k | } | 439 | 80.0k | } | 440 | | // 4x8 DCT of the other half of the block. | 441 | 20.0k | ComputeScaledDCT<4, 8>()( | 442 | 20.0k | DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride), | 443 | 20.0k | block, scratch_space); | 444 | 100k | for (size_t iy = 0; iy < 4; iy++) { | 445 | 720k | for (size_t ix = 0; ix < 8; ix++) { | 446 | 640k | coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix]; | 447 | 640k | } | 448 | 80.0k | } | 449 | 20.0k | float block00 = coefficients[0] * 0.25f; | 450 | 20.0k | float block01 = coefficients[1]; | 451 | 20.0k | float block10 = coefficients[8]; | 452 | 20.0k | coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f; | 453 | 20.0k | coefficients[1] = (block00 - block01) * 0.5f; | 454 | 20.0k | coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f; | 455 | 20.0k | } |
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*) Line | Count | Source | 410 | 14.4k | float* JXL_RESTRICT coefficients) { | 411 | 14.4k | HWY_ALIGN float scratch_space[4 * 8 * 5]; | 412 | 14.4k | size_t afv_x = afv_kind & 1; | 413 | 14.4k | size_t afv_y = afv_kind / 2; | 414 | 14.4k | HWY_ALIGN float block[4 * 8] = {}; | 415 | 72.1k | for (size_t iy = 0; iy < 4; iy++) { | 416 | 288k | for (size_t ix = 0; ix < 4; ix++) { | 417 | 230k | block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] = | 418 | 230k | pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x]; | 419 | 230k | } | 420 | 57.6k | } | 421 | | // AFV coefficients in (even, even) positions. | 422 | 14.4k | HWY_ALIGN float coeff[4 * 4]; | 423 | 14.4k | AFVDCT4x4(block, coeff); | 424 | 72.1k | for (size_t iy = 0; iy < 4; iy++) { | 425 | 288k | for (size_t ix = 0; ix < 4; ix++) { | 426 | 230k | coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix]; | 427 | 230k | } | 428 | 57.6k | } | 429 | | // 4x4 DCT of the block with same y and different x. | 430 | 14.4k | ComputeScaledDCT<4, 4>()( | 431 | 14.4k | DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4), | 432 | 14.4k | pixels_stride), | 433 | 14.4k | block, scratch_space); | 434 | | // ... in (odd, even) positions. | 435 | 72.1k | for (size_t iy = 0; iy < 4; iy++) { | 436 | 519k | for (size_t ix = 0; ix < 8; ix++) { | 437 | 461k | coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix]; | 438 | 461k | } | 439 | 57.6k | } | 440 | | // 4x8 DCT of the other half of the block. | 441 | 14.4k | ComputeScaledDCT<4, 8>()( | 442 | 14.4k | DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride), | 443 | 14.4k | block, scratch_space); | 444 | 72.1k | for (size_t iy = 0; iy < 4; iy++) { | 445 | 519k | for (size_t ix = 0; ix < 8; ix++) { | 446 | 461k | coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix]; | 447 | 461k | } | 448 | 57.6k | } | 449 | 14.4k | float block00 = coefficients[0] * 0.25f; | 450 | 14.4k | float block01 = coefficients[1]; | 451 | 14.4k | float block10 = coefficients[8]; | 452 | 14.4k | coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f; | 453 | 14.4k | coefficients[1] = (block00 - block01) * 0.5f; | 454 | 14.4k | coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f; | 455 | 14.4k | } |
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*) Line | Count | Source | 410 | 15.6k | float* JXL_RESTRICT coefficients) { | 411 | 15.6k | HWY_ALIGN float scratch_space[4 * 8 * 5]; | 412 | 15.6k | size_t afv_x = afv_kind & 1; | 413 | 15.6k | size_t afv_y = afv_kind / 2; | 414 | 15.6k | HWY_ALIGN float block[4 * 8] = {}; | 415 | 78.3k | for (size_t iy = 0; iy < 4; iy++) { | 416 | 313k | for (size_t ix = 0; ix < 4; ix++) { | 417 | 250k | block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] = | 418 | 250k | pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x]; | 419 | 250k | } | 420 | 62.6k | } | 421 | | // AFV coefficients in (even, even) positions. | 422 | 15.6k | HWY_ALIGN float coeff[4 * 4]; | 423 | 15.6k | AFVDCT4x4(block, coeff); | 424 | 78.3k | for (size_t iy = 0; iy < 4; iy++) { | 425 | 313k | for (size_t ix = 0; ix < 4; ix++) { | 426 | 250k | coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix]; | 427 | 250k | } | 428 | 62.6k | } | 429 | | // 4x4 DCT of the block with same y and different x. | 430 | 15.6k | ComputeScaledDCT<4, 4>()( | 431 | 15.6k | DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4), | 432 | 15.6k | pixels_stride), | 433 | 15.6k | block, scratch_space); | 434 | | // ... in (odd, even) positions. | 435 | 78.3k | for (size_t iy = 0; iy < 4; iy++) { | 436 | 563k | for (size_t ix = 0; ix < 8; ix++) { | 437 | 501k | coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix]; | 438 | 501k | } | 439 | 62.6k | } | 440 | | // 4x8 DCT of the other half of the block. | 441 | 15.6k | ComputeScaledDCT<4, 8>()( | 442 | 15.6k | DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride), | 443 | 15.6k | block, scratch_space); | 444 | 78.3k | for (size_t iy = 0; iy < 4; iy++) { | 445 | 563k | for (size_t ix = 0; ix < 8; ix++) { | 446 | 501k | coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix]; | 447 | 501k | } | 448 | 62.6k | } | 449 | 15.6k | float block00 = coefficients[0] * 0.25f; | 450 | 15.6k | float block01 = coefficients[1]; | 451 | 15.6k | float block10 = coefficients[8]; | 452 | 15.6k | coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f; | 453 | 15.6k | coefficients[1] = (block00 - block01) * 0.5f; | 454 | 15.6k | coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f; | 455 | 15.6k | } |
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*) Line | Count | Source | 410 | 23.0k | float* JXL_RESTRICT coefficients) { | 411 | 23.0k | HWY_ALIGN float scratch_space[4 * 8 * 5]; | 412 | 23.0k | size_t afv_x = afv_kind & 1; | 413 | 23.0k | size_t afv_y = afv_kind / 2; | 414 | 23.0k | HWY_ALIGN float block[4 * 8] = {}; | 415 | 115k | for (size_t iy = 0; iy < 4; iy++) { | 416 | 460k | for (size_t ix = 0; ix < 4; ix++) { | 417 | 368k | block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] = | 418 | 368k | pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x]; | 419 | 368k | } | 420 | 92.0k | } | 421 | | // AFV coefficients in (even, even) positions. | 422 | 23.0k | HWY_ALIGN float coeff[4 * 4]; | 423 | 23.0k | AFVDCT4x4(block, coeff); | 424 | 115k | for (size_t iy = 0; iy < 4; iy++) { | 425 | 460k | for (size_t ix = 0; ix < 4; ix++) { | 426 | 368k | coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix]; | 427 | 368k | } | 428 | 92.0k | } | 429 | | // 4x4 DCT of the block with same y and different x. | 430 | 23.0k | ComputeScaledDCT<4, 4>()( | 431 | 23.0k | DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4), | 432 | 23.0k | pixels_stride), | 433 | 23.0k | block, scratch_space); | 434 | | // ... in (odd, even) positions. | 435 | 115k | for (size_t iy = 0; iy < 4; iy++) { | 436 | 828k | for (size_t ix = 0; ix < 8; ix++) { | 437 | 736k | coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix]; | 438 | 736k | } | 439 | 92.0k | } | 440 | | // 4x8 DCT of the other half of the block. | 441 | 23.0k | ComputeScaledDCT<4, 8>()( | 442 | 23.0k | DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride), | 443 | 23.0k | block, scratch_space); | 444 | 115k | for (size_t iy = 0; iy < 4; iy++) { | 445 | 828k | for (size_t ix = 0; ix < 8; ix++) { | 446 | 736k | coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix]; | 447 | 736k | } | 448 | 92.0k | } | 449 | 23.0k | float block00 = coefficients[0] * 0.25f; | 450 | 23.0k | float block01 = coefficients[1]; | 451 | 23.0k | float block10 = coefficients[8]; | 452 | 23.0k | coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f; | 453 | 23.0k | coefficients[1] = (block00 - block01) * 0.5f; | 454 | 23.0k | coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f; | 455 | 23.0k | } |
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*) enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*) Line | Count | Source | 410 | 20.0k | float* JXL_RESTRICT coefficients) { | 411 | 20.0k | HWY_ALIGN float scratch_space[4 * 8 * 5]; | 412 | 20.0k | size_t afv_x = afv_kind & 1; | 413 | 20.0k | size_t afv_y = afv_kind / 2; | 414 | 20.0k | HWY_ALIGN float block[4 * 8] = {}; | 415 | 100k | for (size_t iy = 0; iy < 4; iy++) { | 416 | 400k | for (size_t ix = 0; ix < 4; ix++) { | 417 | 320k | block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] = | 418 | 320k | pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x]; | 419 | 320k | } | 420 | 80.0k | } | 421 | | // AFV coefficients in (even, even) positions. | 422 | 20.0k | HWY_ALIGN float coeff[4 * 4]; | 423 | 20.0k | AFVDCT4x4(block, coeff); | 424 | 100k | for (size_t iy = 0; iy < 4; iy++) { | 425 | 400k | for (size_t ix = 0; ix < 4; ix++) { | 426 | 320k | coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix]; | 427 | 320k | } | 428 | 80.0k | } | 429 | | // 4x4 DCT of the block with same y and different x. | 430 | 20.0k | ComputeScaledDCT<4, 4>()( | 431 | 20.0k | DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4), | 432 | 20.0k | pixels_stride), | 433 | 20.0k | block, scratch_space); | 434 | | // ... in (odd, even) positions. | 435 | 100k | for (size_t iy = 0; iy < 4; iy++) { | 436 | 720k | for (size_t ix = 0; ix < 8; ix++) { | 437 | 640k | coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix]; | 438 | 640k | } | 439 | 80.0k | } | 440 | | // 4x8 DCT of the other half of the block. | 441 | 20.0k | ComputeScaledDCT<4, 8>()( | 442 | 20.0k | DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride), | 443 | 20.0k | block, scratch_space); | 444 | 100k | for (size_t iy = 0; iy < 4; iy++) { | 445 | 720k | for (size_t ix = 0; ix < 8; ix++) { | 446 | 640k | coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix]; | 447 | 640k | } | 448 | 80.0k | } | 449 | 20.0k | float block00 = coefficients[0] * 0.25f; | 450 | 20.0k | float block01 = coefficients[1]; | 451 | 20.0k | float block10 = coefficients[8]; | 452 | 20.0k | coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f; | 453 | 20.0k | coefficients[1] = (block00 - block01) * 0.5f; | 454 | 20.0k | coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f; | 455 | 20.0k | } |
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*) Line | Count | Source | 410 | 14.4k | float* JXL_RESTRICT coefficients) { | 411 | 14.4k | HWY_ALIGN float scratch_space[4 * 8 * 5]; | 412 | 14.4k | size_t afv_x = afv_kind & 1; | 413 | 14.4k | size_t afv_y = afv_kind / 2; | 414 | 14.4k | HWY_ALIGN float block[4 * 8] = {}; | 415 | 72.1k | for (size_t iy = 0; iy < 4; iy++) { | 416 | 288k | for (size_t ix = 0; ix < 4; ix++) { | 417 | 230k | block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] = | 418 | 230k | pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x]; | 419 | 230k | } | 420 | 57.6k | } | 421 | | // AFV coefficients in (even, even) positions. | 422 | 14.4k | HWY_ALIGN float coeff[4 * 4]; | 423 | 14.4k | AFVDCT4x4(block, coeff); | 424 | 72.1k | for (size_t iy = 0; iy < 4; iy++) { | 425 | 288k | for (size_t ix = 0; ix < 4; ix++) { | 426 | 230k | coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix]; | 427 | 230k | } | 428 | 57.6k | } | 429 | | // 4x4 DCT of the block with same y and different x. | 430 | 14.4k | ComputeScaledDCT<4, 4>()( | 431 | 14.4k | DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4), | 432 | 14.4k | pixels_stride), | 433 | 14.4k | block, scratch_space); | 434 | | // ... in (odd, even) positions. | 435 | 72.1k | for (size_t iy = 0; iy < 4; iy++) { | 436 | 519k | for (size_t ix = 0; ix < 8; ix++) { | 437 | 461k | coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix]; | 438 | 461k | } | 439 | 57.6k | } | 440 | | // 4x8 DCT of the other half of the block. | 441 | 14.4k | ComputeScaledDCT<4, 8>()( | 442 | 14.4k | DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride), | 443 | 14.4k | block, scratch_space); | 444 | 72.1k | for (size_t iy = 0; iy < 4; iy++) { | 445 | 519k | for (size_t ix = 0; ix < 8; ix++) { | 446 | 461k | coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix]; | 447 | 461k | } | 448 | 57.6k | } | 449 | 14.4k | float block00 = coefficients[0] * 0.25f; | 450 | 14.4k | float block01 = coefficients[1]; | 451 | 14.4k | float block10 = coefficients[8]; | 452 | 14.4k | coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f; | 453 | 14.4k | coefficients[1] = (block00 - block01) * 0.5f; | 454 | 14.4k | coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f; | 455 | 14.4k | } |
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*) Line | Count | Source | 410 | 15.6k | float* JXL_RESTRICT coefficients) { | 411 | 15.6k | HWY_ALIGN float scratch_space[4 * 8 * 5]; | 412 | 15.6k | size_t afv_x = afv_kind & 1; | 413 | 15.6k | size_t afv_y = afv_kind / 2; | 414 | 15.6k | HWY_ALIGN float block[4 * 8] = {}; | 415 | 78.3k | for (size_t iy = 0; iy < 4; iy++) { | 416 | 313k | for (size_t ix = 0; ix < 4; ix++) { | 417 | 250k | block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] = | 418 | 250k | pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x]; | 419 | 250k | } | 420 | 62.6k | } | 421 | | // AFV coefficients in (even, even) positions. | 422 | 15.6k | HWY_ALIGN float coeff[4 * 4]; | 423 | 15.6k | AFVDCT4x4(block, coeff); | 424 | 78.3k | for (size_t iy = 0; iy < 4; iy++) { | 425 | 313k | for (size_t ix = 0; ix < 4; ix++) { | 426 | 250k | coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix]; | 427 | 250k | } | 428 | 62.6k | } | 429 | | // 4x4 DCT of the block with same y and different x. | 430 | 15.6k | ComputeScaledDCT<4, 4>()( | 431 | 15.6k | DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4), | 432 | 15.6k | pixels_stride), | 433 | 15.6k | block, scratch_space); | 434 | | // ... in (odd, even) positions. | 435 | 78.3k | for (size_t iy = 0; iy < 4; iy++) { | 436 | 563k | for (size_t ix = 0; ix < 8; ix++) { | 437 | 501k | coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix]; | 438 | 501k | } | 439 | 62.6k | } | 440 | | // 4x8 DCT of the other half of the block. | 441 | 15.6k | ComputeScaledDCT<4, 8>()( | 442 | 15.6k | DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride), | 443 | 15.6k | block, scratch_space); | 444 | 78.3k | for (size_t iy = 0; iy < 4; iy++) { | 445 | 563k | for (size_t ix = 0; ix < 8; ix++) { | 446 | 501k | coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix]; | 447 | 501k | } | 448 | 62.6k | } | 449 | 15.6k | float block00 = coefficients[0] * 0.25f; | 450 | 15.6k | float block01 = coefficients[1]; | 451 | 15.6k | float block10 = coefficients[8]; | 452 | 15.6k | coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f; | 453 | 15.6k | coefficients[1] = (block00 - block01) * 0.5f; | 454 | 15.6k | coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f; | 455 | 15.6k | } |
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*) Line | Count | Source | 410 | 23.0k | float* JXL_RESTRICT coefficients) { | 411 | 23.0k | HWY_ALIGN float scratch_space[4 * 8 * 5]; | 412 | 23.0k | size_t afv_x = afv_kind & 1; | 413 | 23.0k | size_t afv_y = afv_kind / 2; | 414 | 23.0k | HWY_ALIGN float block[4 * 8] = {}; | 415 | 115k | for (size_t iy = 0; iy < 4; iy++) { | 416 | 460k | for (size_t ix = 0; ix < 4; ix++) { | 417 | 368k | block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] = | 418 | 368k | pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x]; | 419 | 368k | } | 420 | 92.0k | } | 421 | | // AFV coefficients in (even, even) positions. | 422 | 23.0k | HWY_ALIGN float coeff[4 * 4]; | 423 | 23.0k | AFVDCT4x4(block, coeff); | 424 | 115k | for (size_t iy = 0; iy < 4; iy++) { | 425 | 460k | for (size_t ix = 0; ix < 4; ix++) { | 426 | 368k | coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix]; | 427 | 368k | } | 428 | 92.0k | } | 429 | | // 4x4 DCT of the block with same y and different x. | 430 | 23.0k | ComputeScaledDCT<4, 4>()( | 431 | 23.0k | DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4), | 432 | 23.0k | pixels_stride), | 433 | 23.0k | block, scratch_space); | 434 | | // ... in (odd, even) positions. | 435 | 115k | for (size_t iy = 0; iy < 4; iy++) { | 436 | 828k | for (size_t ix = 0; ix < 8; ix++) { | 437 | 736k | coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix]; | 438 | 736k | } | 439 | 92.0k | } | 440 | | // 4x8 DCT of the other half of the block. | 441 | 23.0k | ComputeScaledDCT<4, 8>()( | 442 | 23.0k | DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride), | 443 | 23.0k | block, scratch_space); | 444 | 115k | for (size_t iy = 0; iy < 4; iy++) { | 445 | 828k | for (size_t ix = 0; ix < 8; ix++) { | 446 | 736k | coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix]; | 447 | 736k | } | 448 | 92.0k | } | 449 | 23.0k | float block00 = coefficients[0] * 0.25f; | 450 | 23.0k | float block01 = coefficients[1]; | 451 | 23.0k | float block10 = coefficients[8]; | 452 | 23.0k | coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f; | 453 | 23.0k | coefficients[1] = (block00 - block01) * 0.5f; | 454 | 23.0k | coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f; | 455 | 23.0k | } |
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*) enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*) Line | Count | Source | 410 | 1.19M | float* JXL_RESTRICT coefficients) { | 411 | 1.19M | HWY_ALIGN float scratch_space[4 * 8 * 5]; | 412 | 1.19M | size_t afv_x = afv_kind & 1; | 413 | 1.19M | size_t afv_y = afv_kind / 2; | 414 | 1.19M | HWY_ALIGN float block[4 * 8] = {}; | 415 | 5.97M | for (size_t iy = 0; iy < 4; iy++) { | 416 | 23.8M | for (size_t ix = 0; ix < 4; ix++) { | 417 | 19.1M | block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] = | 418 | 19.1M | pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x]; | 419 | 19.1M | } | 420 | 4.77M | } | 421 | | // AFV coefficients in (even, even) positions. | 422 | 1.19M | HWY_ALIGN float coeff[4 * 4]; | 423 | 1.19M | AFVDCT4x4(block, coeff); | 424 | 5.97M | for (size_t iy = 0; iy < 4; iy++) { | 425 | 23.8M | for (size_t ix = 0; ix < 4; ix++) { | 426 | 19.1M | coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix]; | 427 | 19.1M | } | 428 | 4.77M | } | 429 | | // 4x4 DCT of the block with same y and different x. | 430 | 1.19M | ComputeScaledDCT<4, 4>()( | 431 | 1.19M | DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4), | 432 | 1.19M | pixels_stride), | 433 | 1.19M | block, scratch_space); | 434 | | // ... in (odd, even) positions. | 435 | 5.97M | for (size_t iy = 0; iy < 4; iy++) { | 436 | 42.9M | for (size_t ix = 0; ix < 8; ix++) { | 437 | 38.2M | coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix]; | 438 | 38.2M | } | 439 | 4.77M | } | 440 | | // 4x8 DCT of the other half of the block. | 441 | 1.19M | ComputeScaledDCT<4, 8>()( | 442 | 1.19M | DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride), | 443 | 1.19M | block, scratch_space); | 444 | 5.97M | for (size_t iy = 0; iy < 4; iy++) { | 445 | 42.9M | for (size_t ix = 0; ix < 8; ix++) { | 446 | 38.2M | coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix]; | 447 | 38.2M | } | 448 | 4.77M | } | 449 | 1.19M | float block00 = coefficients[0] * 0.25f; | 450 | 1.19M | float block01 = coefficients[1]; | 451 | 1.19M | float block10 = coefficients[8]; | 452 | 1.19M | coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f; | 453 | 1.19M | coefficients[1] = (block00 - block01) * 0.5f; | 454 | 1.19M | coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f; | 455 | 1.19M | } |
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*) Line | Count | Source | 410 | 1.19M | float* JXL_RESTRICT coefficients) { | 411 | 1.19M | HWY_ALIGN float scratch_space[4 * 8 * 5]; | 412 | 1.19M | size_t afv_x = afv_kind & 1; | 413 | 1.19M | size_t afv_y = afv_kind / 2; | 414 | 1.19M | HWY_ALIGN float block[4 * 8] = {}; | 415 | 5.97M | for (size_t iy = 0; iy < 4; iy++) { | 416 | 23.8M | for (size_t ix = 0; ix < 4; ix++) { | 417 | 19.1M | block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] = | 418 | 19.1M | pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x]; | 419 | 19.1M | } | 420 | 4.77M | } | 421 | | // AFV coefficients in (even, even) positions. | 422 | 1.19M | HWY_ALIGN float coeff[4 * 4]; | 423 | 1.19M | AFVDCT4x4(block, coeff); | 424 | 5.97M | for (size_t iy = 0; iy < 4; iy++) { | 425 | 23.8M | for (size_t ix = 0; ix < 4; ix++) { | 426 | 19.1M | coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix]; | 427 | 19.1M | } | 428 | 4.77M | } | 429 | | // 4x4 DCT of the block with same y and different x. | 430 | 1.19M | ComputeScaledDCT<4, 4>()( | 431 | 1.19M | DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4), | 432 | 1.19M | pixels_stride), | 433 | 1.19M | block, scratch_space); | 434 | | // ... in (odd, even) positions. | 435 | 5.97M | for (size_t iy = 0; iy < 4; iy++) { | 436 | 42.9M | for (size_t ix = 0; ix < 8; ix++) { | 437 | 38.2M | coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix]; | 438 | 38.2M | } | 439 | 4.77M | } | 440 | | // 4x8 DCT of the other half of the block. | 441 | 1.19M | ComputeScaledDCT<4, 8>()( | 442 | 1.19M | DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride), | 443 | 1.19M | block, scratch_space); | 444 | 5.97M | for (size_t iy = 0; iy < 4; iy++) { | 445 | 42.9M | for (size_t ix = 0; ix < 8; ix++) { | 446 | 38.2M | coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix]; | 447 | 38.2M | } | 448 | 4.77M | } | 449 | 1.19M | float block00 = coefficients[0] * 0.25f; | 450 | 1.19M | float block01 = coefficients[1]; | 451 | 1.19M | float block10 = coefficients[8]; | 452 | 1.19M | coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f; | 453 | 1.19M | coefficients[1] = (block00 - block01) * 0.5f; | 454 | 1.19M | coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f; | 455 | 1.19M | } |
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*) Line | Count | Source | 410 | 1.19M | float* JXL_RESTRICT coefficients) { | 411 | 1.19M | HWY_ALIGN float scratch_space[4 * 8 * 5]; | 412 | 1.19M | size_t afv_x = afv_kind & 1; | 413 | 1.19M | size_t afv_y = afv_kind / 2; | 414 | 1.19M | HWY_ALIGN float block[4 * 8] = {}; | 415 | 5.97M | for (size_t iy = 0; iy < 4; iy++) { | 416 | 23.8M | for (size_t ix = 0; ix < 4; ix++) { | 417 | 19.1M | block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] = | 418 | 19.1M | pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x]; | 419 | 19.1M | } | 420 | 4.77M | } | 421 | | // AFV coefficients in (even, even) positions. | 422 | 1.19M | HWY_ALIGN float coeff[4 * 4]; | 423 | 1.19M | AFVDCT4x4(block, coeff); | 424 | 5.97M | for (size_t iy = 0; iy < 4; iy++) { | 425 | 23.8M | for (size_t ix = 0; ix < 4; ix++) { | 426 | 19.1M | coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix]; | 427 | 19.1M | } | 428 | 4.77M | } | 429 | | // 4x4 DCT of the block with same y and different x. | 430 | 1.19M | ComputeScaledDCT<4, 4>()( | 431 | 1.19M | DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4), | 432 | 1.19M | pixels_stride), | 433 | 1.19M | block, scratch_space); | 434 | | // ... in (odd, even) positions. | 435 | 5.97M | for (size_t iy = 0; iy < 4; iy++) { | 436 | 42.9M | for (size_t ix = 0; ix < 8; ix++) { | 437 | 38.2M | coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix]; | 438 | 38.2M | } | 439 | 4.77M | } | 440 | | // 4x8 DCT of the other half of the block. | 441 | 1.19M | ComputeScaledDCT<4, 8>()( | 442 | 1.19M | DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride), | 443 | 1.19M | block, scratch_space); | 444 | 5.97M | for (size_t iy = 0; iy < 4; iy++) { | 445 | 42.9M | for (size_t ix = 0; ix < 8; ix++) { | 446 | 38.2M | coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix]; | 447 | 38.2M | } | 448 | 4.77M | } | 449 | 1.19M | float block00 = coefficients[0] * 0.25f; | 450 | 1.19M | float block01 = coefficients[1]; | 451 | 1.19M | float block10 = coefficients[8]; | 452 | 1.19M | coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f; | 453 | 1.19M | coefficients[1] = (block00 - block01) * 0.5f; | 454 | 1.19M | coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f; | 455 | 1.19M | } |
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*) Line | Count | Source | 410 | 1.19M | float* JXL_RESTRICT coefficients) { | 411 | 1.19M | HWY_ALIGN float scratch_space[4 * 8 * 5]; | 412 | 1.19M | size_t afv_x = afv_kind & 1; | 413 | 1.19M | size_t afv_y = afv_kind / 2; | 414 | 1.19M | HWY_ALIGN float block[4 * 8] = {}; | 415 | 5.97M | for (size_t iy = 0; iy < 4; iy++) { | 416 | 23.8M | for (size_t ix = 0; ix < 4; ix++) { | 417 | 19.1M | block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] = | 418 | 19.1M | pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x]; | 419 | 19.1M | } | 420 | 4.77M | } | 421 | | // AFV coefficients in (even, even) positions. | 422 | 1.19M | HWY_ALIGN float coeff[4 * 4]; | 423 | 1.19M | AFVDCT4x4(block, coeff); | 424 | 5.97M | for (size_t iy = 0; iy < 4; iy++) { | 425 | 23.8M | for (size_t ix = 0; ix < 4; ix++) { | 426 | 19.1M | coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix]; | 427 | 19.1M | } | 428 | 4.77M | } | 429 | | // 4x4 DCT of the block with same y and different x. | 430 | 1.19M | ComputeScaledDCT<4, 4>()( | 431 | 1.19M | DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4), | 432 | 1.19M | pixels_stride), | 433 | 1.19M | block, scratch_space); | 434 | | // ... in (odd, even) positions. | 435 | 5.97M | for (size_t iy = 0; iy < 4; iy++) { | 436 | 42.9M | for (size_t ix = 0; ix < 8; ix++) { | 437 | 38.2M | coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix]; | 438 | 38.2M | } | 439 | 4.77M | } | 440 | | // 4x8 DCT of the other half of the block. | 441 | 1.19M | ComputeScaledDCT<4, 8>()( | 442 | 1.19M | DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride), | 443 | 1.19M | block, scratch_space); | 444 | 5.97M | for (size_t iy = 0; iy < 4; iy++) { | 445 | 42.9M | for (size_t ix = 0; ix < 8; ix++) { | 446 | 38.2M | coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix]; | 447 | 38.2M | } | 448 | 4.77M | } | 449 | 1.19M | float block00 = coefficients[0] * 0.25f; | 450 | 1.19M | float block01 = coefficients[1]; | 451 | 1.19M | float block10 = coefficients[8]; | 452 | 1.19M | coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f; | 453 | 1.19M | coefficients[1] = (block00 - block01) * 0.5f; | 454 | 1.19M | coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f; | 455 | 1.19M | } |
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*) |
456 | | |
457 | | HWY_MAYBE_UNUSED void TransformFromPixels(const AcStrategyType strategy, |
458 | | const float* JXL_RESTRICT pixels, |
459 | | size_t pixels_stride, |
460 | | float* JXL_RESTRICT coefficients, |
461 | 17.8M | float* JXL_RESTRICT scratch_space) { |
462 | 17.8M | using Type = AcStrategyType; |
463 | 17.8M | switch (strategy) { |
464 | 1.37M | case Type::IDENTITY: { |
465 | 4.12M | for (size_t y = 0; y < 2; y++) { |
466 | 8.25M | for (size_t x = 0; x < 2; x++) { |
467 | 5.50M | float block_dc = 0; |
468 | 27.5M | for (size_t iy = 0; iy < 4; iy++) { |
469 | 110M | for (size_t ix = 0; ix < 4; ix++) { |
470 | 88.0M | block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix]; |
471 | 88.0M | } |
472 | 22.0M | } |
473 | 5.50M | block_dc *= 1.0f / 16; |
474 | 27.5M | for (size_t iy = 0; iy < 4; iy++) { |
475 | 110M | for (size_t ix = 0; ix < 4; ix++) { |
476 | 88.0M | if (ix == 1 && iy == 1) continue; |
477 | 82.5M | coefficients[(y + iy * 2) * 8 + x + ix * 2] = |
478 | 82.5M | pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] - |
479 | 82.5M | pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1]; |
480 | 82.5M | } |
481 | 22.0M | } |
482 | 5.50M | coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x]; |
483 | 5.50M | coefficients[y * 8 + x] = block_dc; |
484 | 5.50M | } |
485 | 2.75M | } |
486 | 1.37M | float block00 = coefficients[0]; |
487 | 1.37M | float block01 = coefficients[1]; |
488 | 1.37M | float block10 = coefficients[8]; |
489 | 1.37M | float block11 = coefficients[9]; |
490 | 1.37M | coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f; |
491 | 1.37M | coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f; |
492 | 1.37M | coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f; |
493 | 1.37M | coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f; |
494 | 1.37M | break; |
495 | 0 | } |
496 | 1.28M | case Type::DCT8X4: { |
497 | 3.86M | for (size_t x = 0; x < 2; x++) { |
498 | 2.57M | HWY_ALIGN float block[4 * 8]; |
499 | 2.57M | ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block, |
500 | 2.57M | scratch_space); |
501 | 12.8M | for (size_t iy = 0; iy < 4; iy++) { |
502 | 92.8M | for (size_t ix = 0; ix < 8; ix++) { |
503 | | // Store transposed. |
504 | 82.5M | coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix]; |
505 | 82.5M | } |
506 | 10.3M | } |
507 | 2.57M | } |
508 | 1.28M | float block0 = coefficients[0]; |
509 | 1.28M | float block1 = coefficients[8]; |
510 | 1.28M | coefficients[0] = (block0 + block1) * 0.5f; |
511 | 1.28M | coefficients[8] = (block0 - block1) * 0.5f; |
512 | 1.28M | break; |
513 | 0 | } |
514 | 1.23M | case Type::DCT4X8: { |
515 | 3.71M | for (size_t y = 0; y < 2; y++) { |
516 | 2.47M | HWY_ALIGN float block[4 * 8]; |
517 | 2.47M | ComputeScaledDCT<4, 8>()( |
518 | 2.47M | DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block, |
519 | 2.47M | scratch_space); |
520 | 12.3M | for (size_t iy = 0; iy < 4; iy++) { |
521 | 89.0M | for (size_t ix = 0; ix < 8; ix++) { |
522 | 79.1M | coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix]; |
523 | 79.1M | } |
524 | 9.89M | } |
525 | 2.47M | } |
526 | 1.23M | float block0 = coefficients[0]; |
527 | 1.23M | float block1 = coefficients[8]; |
528 | 1.23M | coefficients[0] = (block0 + block1) * 0.5f; |
529 | 1.23M | coefficients[8] = (block0 - block1) * 0.5f; |
530 | 1.23M | break; |
531 | 0 | } |
532 | 1.19M | case Type::DCT4X4: { |
533 | 3.58M | for (size_t y = 0; y < 2; y++) { |
534 | 7.16M | for (size_t x = 0; x < 2; x++) { |
535 | 4.77M | HWY_ALIGN float block[4 * 4]; |
536 | 4.77M | ComputeScaledDCT<4, 4>()( |
537 | 4.77M | DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride), |
538 | 4.77M | block, scratch_space); |
539 | 23.8M | for (size_t iy = 0; iy < 4; iy++) { |
540 | 95.5M | for (size_t ix = 0; ix < 4; ix++) { |
541 | 76.4M | coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix]; |
542 | 76.4M | } |
543 | 19.1M | } |
544 | 4.77M | } |
545 | 2.38M | } |
546 | 1.19M | float block00 = coefficients[0]; |
547 | 1.19M | float block01 = coefficients[1]; |
548 | 1.19M | float block10 = coefficients[8]; |
549 | 1.19M | float block11 = coefficients[9]; |
550 | 1.19M | coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f; |
551 | 1.19M | coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f; |
552 | 1.19M | coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f; |
553 | 1.19M | coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f; |
554 | 1.19M | break; |
555 | 0 | } |
556 | 1.60M | case Type::DCT2X2: { |
557 | 1.60M | DCT2TopBlock<8>(pixels, pixels_stride, coefficients); |
558 | 1.60M | DCT2TopBlock<4>(coefficients, kBlockDim, coefficients); |
559 | 1.60M | DCT2TopBlock<2>(coefficients, kBlockDim, coefficients); |
560 | 1.60M | break; |
561 | 0 | } |
562 | 564k | case Type::DCT16X16: { |
563 | 564k | ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients, |
564 | 564k | scratch_space); |
565 | 564k | break; |
566 | 0 | } |
567 | 1.08M | case Type::DCT16X8: { |
568 | 1.08M | ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients, |
569 | 1.08M | scratch_space); |
570 | 1.08M | break; |
571 | 0 | } |
572 | 1.09M | case Type::DCT8X16: { |
573 | 1.09M | ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients, |
574 | 1.09M | scratch_space); |
575 | 1.09M | break; |
576 | 0 | } |
577 | 0 | case Type::DCT32X8: { |
578 | 0 | ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients, |
579 | 0 | scratch_space); |
580 | 0 | break; |
581 | 0 | } |
582 | 0 | case Type::DCT8X32: { |
583 | 0 | ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients, |
584 | 0 | scratch_space); |
585 | 0 | break; |
586 | 0 | } |
587 | 214k | case Type::DCT32X16: { |
588 | 214k | ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients, |
589 | 214k | scratch_space); |
590 | 214k | break; |
591 | 0 | } |
592 | 220k | case Type::DCT16X32: { |
593 | 220k | ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients, |
594 | 220k | scratch_space); |
595 | 220k | break; |
596 | 0 | } |
597 | 130k | case Type::DCT32X32: { |
598 | 130k | ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients, |
599 | 130k | scratch_space); |
600 | 130k | break; |
601 | 0 | } |
602 | 2.76M | case Type::DCT: { |
603 | 2.76M | ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients, |
604 | 2.76M | scratch_space); |
605 | 2.76M | break; |
606 | 0 | } |
607 | 1.23M | case Type::AFV0: { |
608 | 1.23M | AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients); |
609 | 1.23M | break; |
610 | 0 | } |
611 | 1.22M | case Type::AFV1: { |
612 | 1.22M | AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients); |
613 | 1.22M | break; |
614 | 0 | } |
615 | 1.22M | case Type::AFV2: { |
616 | 1.22M | AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients); |
617 | 1.22M | break; |
618 | 0 | } |
619 | 1.24M | case Type::AFV3: { |
620 | 1.24M | AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients); |
621 | 1.24M | break; |
622 | 0 | } |
623 | 19.4k | case Type::DCT64X64: { |
624 | 19.4k | ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients, |
625 | 19.4k | scratch_space); |
626 | 19.4k | break; |
627 | 0 | } |
628 | 66.1k | case Type::DCT64X32: { |
629 | 66.1k | ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients, |
630 | 66.1k | scratch_space); |
631 | 66.1k | break; |
632 | 0 | } |
633 | 48.2k | case Type::DCT32X64: { |
634 | 48.2k | ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients, |
635 | 48.2k | scratch_space); |
636 | 48.2k | break; |
637 | 0 | } |
638 | 0 | case Type::DCT128X128: { |
639 | 0 | ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients, |
640 | 0 | scratch_space); |
641 | 0 | break; |
642 | 0 | } |
643 | 0 | case Type::DCT128X64: { |
644 | 0 | ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients, |
645 | 0 | scratch_space); |
646 | 0 | break; |
647 | 0 | } |
648 | 0 | case Type::DCT64X128: { |
649 | 0 | ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients, |
650 | 0 | scratch_space); |
651 | 0 | break; |
652 | 0 | } |
653 | 0 | case Type::DCT256X256: { |
654 | 0 | ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients, |
655 | 0 | scratch_space); |
656 | 0 | break; |
657 | 0 | } |
658 | 0 | case Type::DCT256X128: { |
659 | 0 | ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients, |
660 | 0 | scratch_space); |
661 | 0 | break; |
662 | 0 | } |
663 | 0 | case Type::DCT128X256: { |
664 | 0 | ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients, |
665 | 0 | scratch_space); |
666 | 0 | break; |
667 | 0 | } |
668 | 17.8M | } |
669 | 17.8M | } Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*) Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*) enc_group.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*) Line | Count | Source | 461 | 725k | float* JXL_RESTRICT scratch_space) { | 462 | 725k | using Type = AcStrategyType; | 463 | 725k | switch (strategy) { | 464 | 90.8k | case Type::IDENTITY: { | 465 | 272k | for (size_t y = 0; y < 2; y++) { | 466 | 544k | for (size_t x = 0; x < 2; x++) { | 467 | 363k | float block_dc = 0; | 468 | 1.81M | for (size_t iy = 0; iy < 4; iy++) { | 469 | 7.26M | for (size_t ix = 0; ix < 4; ix++) { | 470 | 5.81M | block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix]; | 471 | 5.81M | } | 472 | 1.45M | } | 473 | 363k | block_dc *= 1.0f / 16; | 474 | 1.81M | for (size_t iy = 0; iy < 4; iy++) { | 475 | 7.26M | for (size_t ix = 0; ix < 4; ix++) { | 476 | 5.81M | if (ix == 1 && iy == 1) continue; | 477 | 5.44M | coefficients[(y + iy * 2) * 8 + x + ix * 2] = | 478 | 5.44M | pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] - | 479 | 5.44M | pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1]; | 480 | 5.44M | } | 481 | 1.45M | } | 482 | 363k | coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x]; | 483 | 363k | coefficients[y * 8 + x] = block_dc; | 484 | 363k | } | 485 | 181k | } | 486 | 90.8k | float block00 = coefficients[0]; | 487 | 90.8k | float block01 = coefficients[1]; | 488 | 90.8k | float block10 = coefficients[8]; | 489 | 90.8k | float block11 = coefficients[9]; | 490 | 90.8k | coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f; | 491 | 90.8k | coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f; | 492 | 90.8k | coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f; | 493 | 90.8k | coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f; | 494 | 90.8k | break; | 495 | 0 | } | 496 | 47.6k | case Type::DCT8X4: { | 497 | 143k | for (size_t x = 0; x < 2; x++) { | 498 | 95.3k | HWY_ALIGN float block[4 * 8]; | 499 | 95.3k | ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block, | 500 | 95.3k | scratch_space); | 501 | 476k | for (size_t iy = 0; iy < 4; iy++) { | 502 | 3.43M | for (size_t ix = 0; ix < 8; ix++) { | 503 | | // Store transposed. | 504 | 3.05M | coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix]; | 505 | 3.05M | } | 506 | 381k | } | 507 | 95.3k | } | 508 | 47.6k | float block0 = coefficients[0]; | 509 | 47.6k | float block1 = coefficients[8]; | 510 | 47.6k | coefficients[0] = (block0 + block1) * 0.5f; | 511 | 47.6k | coefficients[8] = (block0 - block1) * 0.5f; | 512 | 47.6k | break; | 513 | 0 | } | 514 | 21.4k | case Type::DCT4X8: { | 515 | 64.4k | for (size_t y = 0; y < 2; y++) { | 516 | 42.9k | HWY_ALIGN float block[4 * 8]; | 517 | 42.9k | ComputeScaledDCT<4, 8>()( | 518 | 42.9k | DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block, | 519 | 42.9k | scratch_space); | 520 | 214k | for (size_t iy = 0; iy < 4; iy++) { | 521 | 1.54M | for (size_t ix = 0; ix < 8; ix++) { | 522 | 1.37M | coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix]; | 523 | 1.37M | } | 524 | 171k | } | 525 | 42.9k | } | 526 | 21.4k | float block0 = coefficients[0]; | 527 | 21.4k | float block1 = coefficients[8]; | 528 | 21.4k | coefficients[0] = (block0 + block1) * 0.5f; | 529 | 21.4k | coefficients[8] = (block0 - block1) * 0.5f; | 530 | 21.4k | break; | 531 | 0 | } | 532 | 267 | case Type::DCT4X4: { | 533 | 801 | for (size_t y = 0; y < 2; y++) { | 534 | 1.60k | for (size_t x = 0; x < 2; x++) { | 535 | 1.06k | HWY_ALIGN float block[4 * 4]; | 536 | 1.06k | ComputeScaledDCT<4, 4>()( | 537 | 1.06k | DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride), | 538 | 1.06k | block, scratch_space); | 539 | 5.34k | for (size_t iy = 0; iy < 4; iy++) { | 540 | 21.3k | for (size_t ix = 0; ix < 4; ix++) { | 541 | 17.0k | coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix]; | 542 | 17.0k | } | 543 | 4.27k | } | 544 | 1.06k | } | 545 | 534 | } | 546 | 267 | float block00 = coefficients[0]; | 547 | 267 | float block01 = coefficients[1]; | 548 | 267 | float block10 = coefficients[8]; | 549 | 267 | float block11 = coefficients[9]; | 550 | 267 | coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f; | 551 | 267 | coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f; | 552 | 267 | coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f; | 553 | 267 | coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f; | 554 | 267 | break; | 555 | 0 | } | 556 | 206k | case Type::DCT2X2: { | 557 | 206k | DCT2TopBlock<8>(pixels, pixels_stride, coefficients); | 558 | 206k | DCT2TopBlock<4>(coefficients, kBlockDim, coefficients); | 559 | 206k | DCT2TopBlock<2>(coefficients, kBlockDim, coefficients); | 560 | 206k | break; | 561 | 0 | } | 562 | 19.1k | case Type::DCT16X16: { | 563 | 19.1k | ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients, | 564 | 19.1k | scratch_space); | 565 | 19.1k | break; | 566 | 0 | } | 567 | 24.7k | case Type::DCT16X8: { | 568 | 24.7k | ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients, | 569 | 24.7k | scratch_space); | 570 | 24.7k | break; | 571 | 0 | } | 572 | 27.7k | case Type::DCT8X16: { | 573 | 27.7k | ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients, | 574 | 27.7k | scratch_space); | 575 | 27.7k | break; | 576 | 0 | } | 577 | 0 | case Type::DCT32X8: { | 578 | 0 | ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients, | 579 | 0 | scratch_space); | 580 | 0 | break; | 581 | 0 | } | 582 | 0 | case Type::DCT8X32: { | 583 | 0 | ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients, | 584 | 0 | scratch_space); | 585 | 0 | break; | 586 | 0 | } | 587 | 5.57k | case Type::DCT32X16: { | 588 | 5.57k | ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients, | 589 | 5.57k | scratch_space); | 590 | 5.57k | break; | 591 | 0 | } | 592 | 6.93k | case Type::DCT16X32: { | 593 | 6.93k | ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients, | 594 | 6.93k | scratch_space); | 595 | 6.93k | break; | 596 | 0 | } | 597 | 12.3k | case Type::DCT32X32: { | 598 | 12.3k | ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients, | 599 | 12.3k | scratch_space); | 600 | 12.3k | break; | 601 | 0 | } | 602 | 187k | case Type::DCT: { | 603 | 187k | ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients, | 604 | 187k | scratch_space); | 605 | 187k | break; | 606 | 0 | } | 607 | 20.0k | case Type::AFV0: { | 608 | 20.0k | AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients); | 609 | 20.0k | break; | 610 | 0 | } | 611 | 14.4k | case Type::AFV1: { | 612 | 14.4k | AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients); | 613 | 14.4k | break; | 614 | 0 | } | 615 | 15.6k | case Type::AFV2: { | 616 | 15.6k | AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients); | 617 | 15.6k | break; | 618 | 0 | } | 619 | 23.0k | case Type::AFV3: { | 620 | 23.0k | AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients); | 621 | 23.0k | break; | 622 | 0 | } | 623 | 1.23k | case Type::DCT64X64: { | 624 | 1.23k | ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients, | 625 | 1.23k | scratch_space); | 626 | 1.23k | break; | 627 | 0 | } | 628 | 162 | case Type::DCT64X32: { | 629 | 162 | ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients, | 630 | 162 | scratch_space); | 631 | 162 | break; | 632 | 0 | } | 633 | 123 | case Type::DCT32X64: { | 634 | 123 | ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients, | 635 | 123 | scratch_space); | 636 | 123 | break; | 637 | 0 | } | 638 | 0 | case Type::DCT128X128: { | 639 | 0 | ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients, | 640 | 0 | scratch_space); | 641 | 0 | break; | 642 | 0 | } | 643 | 0 | case Type::DCT128X64: { | 644 | 0 | ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients, | 645 | 0 | scratch_space); | 646 | 0 | break; | 647 | 0 | } | 648 | 0 | case Type::DCT64X128: { | 649 | 0 | ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients, | 650 | 0 | scratch_space); | 651 | 0 | break; | 652 | 0 | } | 653 | 0 | case Type::DCT256X256: { | 654 | 0 | ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients, | 655 | 0 | scratch_space); | 656 | 0 | break; | 657 | 0 | } | 658 | 0 | case Type::DCT256X128: { | 659 | 0 | ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients, | 660 | 0 | scratch_space); | 661 | 0 | break; | 662 | 0 | } | 663 | 0 | case Type::DCT128X256: { | 664 | 0 | ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients, | 665 | 0 | scratch_space); | 666 | 0 | break; | 667 | 0 | } | 668 | 725k | } | 669 | 725k | } |
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*) enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*) Line | Count | Source | 461 | 1.91M | float* JXL_RESTRICT scratch_space) { | 462 | 1.91M | using Type = AcStrategyType; | 463 | 1.91M | switch (strategy) { | 464 | 90.8k | case Type::IDENTITY: { | 465 | 272k | for (size_t y = 0; y < 2; y++) { | 466 | 544k | for (size_t x = 0; x < 2; x++) { | 467 | 363k | float block_dc = 0; | 468 | 1.81M | for (size_t iy = 0; iy < 4; iy++) { | 469 | 7.26M | for (size_t ix = 0; ix < 4; ix++) { | 470 | 5.81M | block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix]; | 471 | 5.81M | } | 472 | 1.45M | } | 473 | 363k | block_dc *= 1.0f / 16; | 474 | 1.81M | for (size_t iy = 0; iy < 4; iy++) { | 475 | 7.26M | for (size_t ix = 0; ix < 4; ix++) { | 476 | 5.81M | if (ix == 1 && iy == 1) continue; | 477 | 5.44M | coefficients[(y + iy * 2) * 8 + x + ix * 2] = | 478 | 5.44M | pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] - | 479 | 5.44M | pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1]; | 480 | 5.44M | } | 481 | 1.45M | } | 482 | 363k | coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x]; | 483 | 363k | coefficients[y * 8 + x] = block_dc; | 484 | 363k | } | 485 | 181k | } | 486 | 90.8k | float block00 = coefficients[0]; | 487 | 90.8k | float block01 = coefficients[1]; | 488 | 90.8k | float block10 = coefficients[8]; | 489 | 90.8k | float block11 = coefficients[9]; | 490 | 90.8k | coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f; | 491 | 90.8k | coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f; | 492 | 90.8k | coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f; | 493 | 90.8k | coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f; | 494 | 90.8k | break; | 495 | 0 | } | 496 | 47.6k | case Type::DCT8X4: { | 497 | 143k | for (size_t x = 0; x < 2; x++) { | 498 | 95.3k | HWY_ALIGN float block[4 * 8]; | 499 | 95.3k | ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block, | 500 | 95.3k | scratch_space); | 501 | 476k | for (size_t iy = 0; iy < 4; iy++) { | 502 | 3.43M | for (size_t ix = 0; ix < 8; ix++) { | 503 | | // Store transposed. | 504 | 3.05M | coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix]; | 505 | 3.05M | } | 506 | 381k | } | 507 | 95.3k | } | 508 | 47.6k | float block0 = coefficients[0]; | 509 | 47.6k | float block1 = coefficients[8]; | 510 | 47.6k | coefficients[0] = (block0 + block1) * 0.5f; | 511 | 47.6k | coefficients[8] = (block0 - block1) * 0.5f; | 512 | 47.6k | break; | 513 | 0 | } | 514 | 21.4k | case Type::DCT4X8: { | 515 | 64.4k | for (size_t y = 0; y < 2; y++) { | 516 | 42.9k | HWY_ALIGN float block[4 * 8]; | 517 | 42.9k | ComputeScaledDCT<4, 8>()( | 518 | 42.9k | DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block, | 519 | 42.9k | scratch_space); | 520 | 214k | for (size_t iy = 0; iy < 4; iy++) { | 521 | 1.54M | for (size_t ix = 0; ix < 8; ix++) { | 522 | 1.37M | coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix]; | 523 | 1.37M | } | 524 | 171k | } | 525 | 42.9k | } | 526 | 21.4k | float block0 = coefficients[0]; | 527 | 21.4k | float block1 = coefficients[8]; | 528 | 21.4k | coefficients[0] = (block0 + block1) * 0.5f; | 529 | 21.4k | coefficients[8] = (block0 - block1) * 0.5f; | 530 | 21.4k | break; | 531 | 0 | } | 532 | 267 | case Type::DCT4X4: { | 533 | 801 | for (size_t y = 0; y < 2; y++) { | 534 | 1.60k | for (size_t x = 0; x < 2; x++) { | 535 | 1.06k | HWY_ALIGN float block[4 * 4]; | 536 | 1.06k | ComputeScaledDCT<4, 4>()( | 537 | 1.06k | DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride), | 538 | 1.06k | block, scratch_space); | 539 | 5.34k | for (size_t iy = 0; iy < 4; iy++) { | 540 | 21.3k | for (size_t ix = 0; ix < 4; ix++) { | 541 | 17.0k | coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix]; | 542 | 17.0k | } | 543 | 4.27k | } | 544 | 1.06k | } | 545 | 534 | } | 546 | 267 | float block00 = coefficients[0]; | 547 | 267 | float block01 = coefficients[1]; | 548 | 267 | float block10 = coefficients[8]; | 549 | 267 | float block11 = coefficients[9]; | 550 | 267 | coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f; | 551 | 267 | coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f; | 552 | 267 | coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f; | 553 | 267 | coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f; | 554 | 267 | break; | 555 | 0 | } | 556 | 206k | case Type::DCT2X2: { | 557 | 206k | DCT2TopBlock<8>(pixels, pixels_stride, coefficients); | 558 | 206k | DCT2TopBlock<4>(coefficients, kBlockDim, coefficients); | 559 | 206k | DCT2TopBlock<2>(coefficients, kBlockDim, coefficients); | 560 | 206k | break; | 561 | 0 | } | 562 | 19.1k | case Type::DCT16X16: { | 563 | 19.1k | ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients, | 564 | 19.1k | scratch_space); | 565 | 19.1k | break; | 566 | 0 | } | 567 | 24.7k | case Type::DCT16X8: { | 568 | 24.7k | ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients, | 569 | 24.7k | scratch_space); | 570 | 24.7k | break; | 571 | 0 | } | 572 | 27.7k | case Type::DCT8X16: { | 573 | 27.7k | ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients, | 574 | 27.7k | scratch_space); | 575 | 27.7k | break; | 576 | 0 | } | 577 | 0 | case Type::DCT32X8: { | 578 | 0 | ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients, | 579 | 0 | scratch_space); | 580 | 0 | break; | 581 | 0 | } | 582 | 0 | case Type::DCT8X32: { | 583 | 0 | ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients, | 584 | 0 | scratch_space); | 585 | 0 | break; | 586 | 0 | } | 587 | 5.57k | case Type::DCT32X16: { | 588 | 5.57k | ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients, | 589 | 5.57k | scratch_space); | 590 | 5.57k | break; | 591 | 0 | } | 592 | 6.93k | case Type::DCT16X32: { | 593 | 6.93k | ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients, | 594 | 6.93k | scratch_space); | 595 | 6.93k | break; | 596 | 0 | } | 597 | 12.3k | case Type::DCT32X32: { | 598 | 12.3k | ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients, | 599 | 12.3k | scratch_space); | 600 | 12.3k | break; | 601 | 0 | } | 602 | 1.38M | case Type::DCT: { | 603 | 1.38M | ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients, | 604 | 1.38M | scratch_space); | 605 | 1.38M | break; | 606 | 0 | } | 607 | 20.0k | case Type::AFV0: { | 608 | 20.0k | AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients); | 609 | 20.0k | break; | 610 | 0 | } | 611 | 14.4k | case Type::AFV1: { | 612 | 14.4k | AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients); | 613 | 14.4k | break; | 614 | 0 | } | 615 | 15.6k | case Type::AFV2: { | 616 | 15.6k | AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients); | 617 | 15.6k | break; | 618 | 0 | } | 619 | 23.0k | case Type::AFV3: { | 620 | 23.0k | AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients); | 621 | 23.0k | break; | 622 | 0 | } | 623 | 1.23k | case Type::DCT64X64: { | 624 | 1.23k | ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients, | 625 | 1.23k | scratch_space); | 626 | 1.23k | break; | 627 | 0 | } | 628 | 162 | case Type::DCT64X32: { | 629 | 162 | ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients, | 630 | 162 | scratch_space); | 631 | 162 | break; | 632 | 0 | } | 633 | 123 | case Type::DCT32X64: { | 634 | 123 | ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients, | 635 | 123 | scratch_space); | 636 | 123 | break; | 637 | 0 | } | 638 | 0 | case Type::DCT128X128: { | 639 | 0 | ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients, | 640 | 0 | scratch_space); | 641 | 0 | break; | 642 | 0 | } | 643 | 0 | case Type::DCT128X64: { | 644 | 0 | ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients, | 645 | 0 | scratch_space); | 646 | 0 | break; | 647 | 0 | } | 648 | 0 | case Type::DCT64X128: { | 649 | 0 | ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients, | 650 | 0 | scratch_space); | 651 | 0 | break; | 652 | 0 | } | 653 | 0 | case Type::DCT256X256: { | 654 | 0 | ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients, | 655 | 0 | scratch_space); | 656 | 0 | break; | 657 | 0 | } | 658 | 0 | case Type::DCT256X128: { | 659 | 0 | ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients, | 660 | 0 | scratch_space); | 661 | 0 | break; | 662 | 0 | } | 663 | 0 | case Type::DCT128X256: { | 664 | 0 | ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients, | 665 | 0 | scratch_space); | 666 | 0 | break; | 667 | 0 | } | 668 | 1.91M | } | 669 | 1.91M | } |
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*) Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*) enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*) Line | Count | Source | 461 | 15.1M | float* JXL_RESTRICT scratch_space) { | 462 | 15.1M | using Type = AcStrategyType; | 463 | 15.1M | switch (strategy) { | 464 | 1.19M | case Type::IDENTITY: { | 465 | 3.58M | for (size_t y = 0; y < 2; y++) { | 466 | 7.16M | for (size_t x = 0; x < 2; x++) { | 467 | 4.77M | float block_dc = 0; | 468 | 23.8M | for (size_t iy = 0; iy < 4; iy++) { | 469 | 95.5M | for (size_t ix = 0; ix < 4; ix++) { | 470 | 76.4M | block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix]; | 471 | 76.4M | } | 472 | 19.1M | } | 473 | 4.77M | block_dc *= 1.0f / 16; | 474 | 23.8M | for (size_t iy = 0; iy < 4; iy++) { | 475 | 95.5M | for (size_t ix = 0; ix < 4; ix++) { | 476 | 76.4M | if (ix == 1 && iy == 1) continue; | 477 | 71.6M | coefficients[(y + iy * 2) * 8 + x + ix * 2] = | 478 | 71.6M | pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] - | 479 | 71.6M | pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1]; | 480 | 71.6M | } | 481 | 19.1M | } | 482 | 4.77M | coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x]; | 483 | 4.77M | coefficients[y * 8 + x] = block_dc; | 484 | 4.77M | } | 485 | 2.38M | } | 486 | 1.19M | float block00 = coefficients[0]; | 487 | 1.19M | float block01 = coefficients[1]; | 488 | 1.19M | float block10 = coefficients[8]; | 489 | 1.19M | float block11 = coefficients[9]; | 490 | 1.19M | coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f; | 491 | 1.19M | coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f; | 492 | 1.19M | coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f; | 493 | 1.19M | coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f; | 494 | 1.19M | break; | 495 | 0 | } | 496 | 1.19M | case Type::DCT8X4: { | 497 | 3.58M | for (size_t x = 0; x < 2; x++) { | 498 | 2.38M | HWY_ALIGN float block[4 * 8]; | 499 | 2.38M | ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block, | 500 | 2.38M | scratch_space); | 501 | 11.9M | for (size_t iy = 0; iy < 4; iy++) { | 502 | 85.9M | for (size_t ix = 0; ix < 8; ix++) { | 503 | | // Store transposed. | 504 | 76.4M | coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix]; | 505 | 76.4M | } | 506 | 9.55M | } | 507 | 2.38M | } | 508 | 1.19M | float block0 = coefficients[0]; | 509 | 1.19M | float block1 = coefficients[8]; | 510 | 1.19M | coefficients[0] = (block0 + block1) * 0.5f; | 511 | 1.19M | coefficients[8] = (block0 - block1) * 0.5f; | 512 | 1.19M | break; | 513 | 0 | } | 514 | 1.19M | case Type::DCT4X8: { | 515 | 3.58M | for (size_t y = 0; y < 2; y++) { | 516 | 2.38M | HWY_ALIGN float block[4 * 8]; | 517 | 2.38M | ComputeScaledDCT<4, 8>()( | 518 | 2.38M | DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block, | 519 | 2.38M | scratch_space); | 520 | 11.9M | for (size_t iy = 0; iy < 4; iy++) { | 521 | 85.9M | for (size_t ix = 0; ix < 8; ix++) { | 522 | 76.4M | coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix]; | 523 | 76.4M | } | 524 | 9.55M | } | 525 | 2.38M | } | 526 | 1.19M | float block0 = coefficients[0]; | 527 | 1.19M | float block1 = coefficients[8]; | 528 | 1.19M | coefficients[0] = (block0 + block1) * 0.5f; | 529 | 1.19M | coefficients[8] = (block0 - block1) * 0.5f; | 530 | 1.19M | break; | 531 | 0 | } | 532 | 1.19M | case Type::DCT4X4: { | 533 | 3.58M | for (size_t y = 0; y < 2; y++) { | 534 | 7.16M | for (size_t x = 0; x < 2; x++) { | 535 | 4.77M | HWY_ALIGN float block[4 * 4]; | 536 | 4.77M | ComputeScaledDCT<4, 4>()( | 537 | 4.77M | DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride), | 538 | 4.77M | block, scratch_space); | 539 | 23.8M | for (size_t iy = 0; iy < 4; iy++) { | 540 | 95.5M | for (size_t ix = 0; ix < 4; ix++) { | 541 | 76.4M | coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix]; | 542 | 76.4M | } | 543 | 19.1M | } | 544 | 4.77M | } | 545 | 2.38M | } | 546 | 1.19M | float block00 = coefficients[0]; | 547 | 1.19M | float block01 = coefficients[1]; | 548 | 1.19M | float block10 = coefficients[8]; | 549 | 1.19M | float block11 = coefficients[9]; | 550 | 1.19M | coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f; | 551 | 1.19M | coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f; | 552 | 1.19M | coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f; | 553 | 1.19M | coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f; | 554 | 1.19M | break; | 555 | 0 | } | 556 | 1.19M | case Type::DCT2X2: { | 557 | 1.19M | DCT2TopBlock<8>(pixels, pixels_stride, coefficients); | 558 | 1.19M | DCT2TopBlock<4>(coefficients, kBlockDim, coefficients); | 559 | 1.19M | DCT2TopBlock<2>(coefficients, kBlockDim, coefficients); | 560 | 1.19M | break; | 561 | 0 | } | 562 | 525k | case Type::DCT16X16: { | 563 | 525k | ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients, | 564 | 525k | scratch_space); | 565 | 525k | break; | 566 | 0 | } | 567 | 1.03M | case Type::DCT16X8: { | 568 | 1.03M | ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients, | 569 | 1.03M | scratch_space); | 570 | 1.03M | break; | 571 | 0 | } | 572 | 1.03M | case Type::DCT8X16: { | 573 | 1.03M | ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients, | 574 | 1.03M | scratch_space); | 575 | 1.03M | break; | 576 | 0 | } | 577 | 0 | case Type::DCT32X8: { | 578 | 0 | ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients, | 579 | 0 | scratch_space); | 580 | 0 | break; | 581 | 0 | } | 582 | 0 | case Type::DCT8X32: { | 583 | 0 | ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients, | 584 | 0 | scratch_space); | 585 | 0 | break; | 586 | 0 | } | 587 | 203k | case Type::DCT32X16: { | 588 | 203k | ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients, | 589 | 203k | scratch_space); | 590 | 203k | break; | 591 | 0 | } | 592 | 206k | case Type::DCT16X32: { | 593 | 206k | ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients, | 594 | 206k | scratch_space); | 595 | 206k | break; | 596 | 0 | } | 597 | 106k | case Type::DCT32X32: { | 598 | 106k | ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients, | 599 | 106k | scratch_space); | 600 | 106k | break; | 601 | 0 | } | 602 | 1.19M | case Type::DCT: { | 603 | 1.19M | ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients, | 604 | 1.19M | scratch_space); | 605 | 1.19M | break; | 606 | 0 | } | 607 | 1.19M | case Type::AFV0: { | 608 | 1.19M | AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients); | 609 | 1.19M | break; | 610 | 0 | } | 611 | 1.19M | case Type::AFV1: { | 612 | 1.19M | AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients); | 613 | 1.19M | break; | 614 | 0 | } | 615 | 1.19M | case Type::AFV2: { | 616 | 1.19M | AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients); | 617 | 1.19M | break; | 618 | 0 | } | 619 | 1.19M | case Type::AFV3: { | 620 | 1.19M | AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients); | 621 | 1.19M | break; | 622 | 0 | } | 623 | 17.0k | case Type::DCT64X64: { | 624 | 17.0k | ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients, | 625 | 17.0k | scratch_space); | 626 | 17.0k | break; | 627 | 0 | } | 628 | 65.8k | case Type::DCT64X32: { | 629 | 65.8k | ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients, | 630 | 65.8k | scratch_space); | 631 | 65.8k | break; | 632 | 0 | } | 633 | 48.0k | case Type::DCT32X64: { | 634 | 48.0k | ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients, | 635 | 48.0k | scratch_space); | 636 | 48.0k | break; | 637 | 0 | } | 638 | 0 | case Type::DCT128X128: { | 639 | 0 | ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients, | 640 | 0 | scratch_space); | 641 | 0 | break; | 642 | 0 | } | 643 | 0 | case Type::DCT128X64: { | 644 | 0 | ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients, | 645 | 0 | scratch_space); | 646 | 0 | break; | 647 | 0 | } | 648 | 0 | case Type::DCT64X128: { | 649 | 0 | ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients, | 650 | 0 | scratch_space); | 651 | 0 | break; | 652 | 0 | } | 653 | 0 | case Type::DCT256X256: { | 654 | 0 | ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients, | 655 | 0 | scratch_space); | 656 | 0 | break; | 657 | 0 | } | 658 | 0 | case Type::DCT256X128: { | 659 | 0 | ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients, | 660 | 0 | scratch_space); | 661 | 0 | break; | 662 | 0 | } | 663 | 0 | case Type::DCT128X256: { | 664 | 0 | ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients, | 665 | 0 | scratch_space); | 666 | 0 | break; | 667 | 0 | } | 668 | 15.1M | } | 669 | 15.1M | } |
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*) |
670 | | |
671 | | // `scratch_space` should be at least 4 * kMaxBlocks * kMaxBlocks elements. |
672 | | HWY_MAYBE_UNUSED void DCFromLowestFrequencies(const AcStrategyType strategy, |
673 | | const float* block, float* dc, |
674 | | size_t dc_stride, |
675 | 2.64M | float* scratch_space) { |
676 | 2.64M | using Type = AcStrategyType; |
677 | 2.64M | switch (strategy) { |
678 | 49.4k | case Type::DCT16X8: { |
679 | 49.4k | ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim, |
680 | 49.4k | /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>( |
681 | 49.4k | block, 2 * kBlockDim, dc, dc_stride, scratch_space); |
682 | 49.4k | break; |
683 | 0 | } |
684 | 55.4k | case Type::DCT8X16: { |
685 | 55.4k | ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim, |
686 | 55.4k | /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>( |
687 | 55.4k | block, 2 * kBlockDim, dc, dc_stride, scratch_space); |
688 | 55.4k | break; |
689 | 0 | } |
690 | 38.3k | case Type::DCT16X16: { |
691 | 38.3k | ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim, |
692 | 38.3k | /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>( |
693 | 38.3k | block, 2 * kBlockDim, dc, dc_stride, scratch_space); |
694 | 38.3k | break; |
695 | 0 | } |
696 | 0 | case Type::DCT32X8: { |
697 | 0 | ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim, |
698 | 0 | /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>( |
699 | 0 | block, 4 * kBlockDim, dc, dc_stride, scratch_space); |
700 | 0 | break; |
701 | 0 | } |
702 | 0 | case Type::DCT8X32: { |
703 | 0 | ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim, |
704 | 0 | /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>( |
705 | 0 | block, 4 * kBlockDim, dc, dc_stride, scratch_space); |
706 | 0 | break; |
707 | 0 | } |
708 | 11.1k | case Type::DCT32X16: { |
709 | 11.1k | ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim, |
710 | 11.1k | /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>( |
711 | 11.1k | block, 4 * kBlockDim, dc, dc_stride, scratch_space); |
712 | 11.1k | break; |
713 | 0 | } |
714 | 13.8k | case Type::DCT16X32: { |
715 | 13.8k | ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim, |
716 | 13.8k | /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>( |
717 | 13.8k | block, 4 * kBlockDim, dc, dc_stride, scratch_space); |
718 | 13.8k | break; |
719 | 0 | } |
720 | 24.6k | case Type::DCT32X32: { |
721 | 24.6k | ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim, |
722 | 24.6k | /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>( |
723 | 24.6k | block, 4 * kBlockDim, dc, dc_stride, scratch_space); |
724 | 24.6k | break; |
725 | 0 | } |
726 | 324 | case Type::DCT64X32: { |
727 | 324 | ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim, |
728 | 324 | /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>( |
729 | 324 | block, 8 * kBlockDim, dc, dc_stride, scratch_space); |
730 | 324 | break; |
731 | 0 | } |
732 | 246 | case Type::DCT32X64: { |
733 | 246 | ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim, |
734 | 246 | /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>( |
735 | 246 | block, 8 * kBlockDim, dc, dc_stride, scratch_space); |
736 | 246 | break; |
737 | 0 | } |
738 | 2.46k | case Type::DCT64X64: { |
739 | 2.46k | ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim, |
740 | 2.46k | /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>( |
741 | 2.46k | block, 8 * kBlockDim, dc, dc_stride, scratch_space); |
742 | 2.46k | break; |
743 | 0 | } |
744 | 0 | case Type::DCT128X64: { |
745 | 0 | ReinterpretingIDCT< |
746 | 0 | /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim, |
747 | 0 | /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>( |
748 | 0 | block, 16 * kBlockDim, dc, dc_stride, scratch_space); |
749 | 0 | break; |
750 | 0 | } |
751 | 0 | case Type::DCT64X128: { |
752 | 0 | ReinterpretingIDCT< |
753 | 0 | /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim, |
754 | 0 | /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>( |
755 | 0 | block, 16 * kBlockDim, dc, dc_stride, scratch_space); |
756 | 0 | break; |
757 | 0 | } |
758 | 0 | case Type::DCT128X128: { |
759 | 0 | ReinterpretingIDCT< |
760 | 0 | /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim, |
761 | 0 | /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>( |
762 | 0 | block, 16 * kBlockDim, dc, dc_stride, scratch_space); |
763 | 0 | break; |
764 | 0 | } |
765 | 0 | case Type::DCT256X128: { |
766 | 0 | ReinterpretingIDCT< |
767 | 0 | /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim, |
768 | 0 | /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>( |
769 | 0 | block, 32 * kBlockDim, dc, dc_stride, scratch_space); |
770 | 0 | break; |
771 | 0 | } |
772 | 0 | case Type::DCT128X256: { |
773 | 0 | ReinterpretingIDCT< |
774 | 0 | /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim, |
775 | 0 | /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>( |
776 | 0 | block, 32 * kBlockDim, dc, dc_stride, scratch_space); |
777 | 0 | break; |
778 | 0 | } |
779 | 0 | case Type::DCT256X256: { |
780 | 0 | ReinterpretingIDCT< |
781 | 0 | /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim, |
782 | 0 | /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>( |
783 | 0 | block, 32 * kBlockDim, dc, dc_stride, scratch_space); |
784 | 0 | break; |
785 | 0 | } |
786 | 1.56M | case Type::DCT: |
787 | 1.98M | case Type::DCT2X2: |
788 | 1.98M | case Type::DCT4X4: |
789 | 2.02M | case Type::DCT4X8: |
790 | 2.12M | case Type::DCT8X4: |
791 | 2.16M | case Type::AFV0: |
792 | 2.18M | case Type::AFV1: |
793 | 2.22M | case Type::AFV2: |
794 | 2.26M | case Type::AFV3: |
795 | 2.44M | case Type::IDENTITY: |
796 | 2.44M | dc[0] = block[0]; |
797 | 2.44M | break; |
798 | 2.64M | } |
799 | 2.64M | } Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*) Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*) Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*) enc_group.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*) Line | Count | Source | 675 | 725k | float* scratch_space) { | 676 | 725k | using Type = AcStrategyType; | 677 | 725k | switch (strategy) { | 678 | 24.7k | case Type::DCT16X8: { | 679 | 24.7k | ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim, | 680 | 24.7k | /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>( | 681 | 24.7k | block, 2 * kBlockDim, dc, dc_stride, scratch_space); | 682 | 24.7k | break; | 683 | 0 | } | 684 | 27.7k | case Type::DCT8X16: { | 685 | 27.7k | ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim, | 686 | 27.7k | /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>( | 687 | 27.7k | block, 2 * kBlockDim, dc, dc_stride, scratch_space); | 688 | 27.7k | break; | 689 | 0 | } | 690 | 19.1k | case Type::DCT16X16: { | 691 | 19.1k | ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim, | 692 | 19.1k | /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>( | 693 | 19.1k | block, 2 * kBlockDim, dc, dc_stride, scratch_space); | 694 | 19.1k | break; | 695 | 0 | } | 696 | 0 | case Type::DCT32X8: { | 697 | 0 | ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim, | 698 | 0 | /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>( | 699 | 0 | block, 4 * kBlockDim, dc, dc_stride, scratch_space); | 700 | 0 | break; | 701 | 0 | } | 702 | 0 | case Type::DCT8X32: { | 703 | 0 | ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim, | 704 | 0 | /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>( | 705 | 0 | block, 4 * kBlockDim, dc, dc_stride, scratch_space); | 706 | 0 | break; | 707 | 0 | } | 708 | 5.57k | case Type::DCT32X16: { | 709 | 5.57k | ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim, | 710 | 5.57k | /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>( | 711 | 5.57k | block, 4 * kBlockDim, dc, dc_stride, scratch_space); | 712 | 5.57k | break; | 713 | 0 | } | 714 | 6.93k | case Type::DCT16X32: { | 715 | 6.93k | ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim, | 716 | 6.93k | /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>( | 717 | 6.93k | block, 4 * kBlockDim, dc, dc_stride, scratch_space); | 718 | 6.93k | break; | 719 | 0 | } | 720 | 12.3k | case Type::DCT32X32: { | 721 | 12.3k | ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim, | 722 | 12.3k | /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>( | 723 | 12.3k | block, 4 * kBlockDim, dc, dc_stride, scratch_space); | 724 | 12.3k | break; | 725 | 0 | } | 726 | 162 | case Type::DCT64X32: { | 727 | 162 | ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim, | 728 | 162 | /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>( | 729 | 162 | block, 8 * kBlockDim, dc, dc_stride, scratch_space); | 730 | 162 | break; | 731 | 0 | } | 732 | 123 | case Type::DCT32X64: { | 733 | 123 | ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim, | 734 | 123 | /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>( | 735 | 123 | block, 8 * kBlockDim, dc, dc_stride, scratch_space); | 736 | 123 | break; | 737 | 0 | } | 738 | 1.23k | case Type::DCT64X64: { | 739 | 1.23k | ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim, | 740 | 1.23k | /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>( | 741 | 1.23k | block, 8 * kBlockDim, dc, dc_stride, scratch_space); | 742 | 1.23k | break; | 743 | 0 | } | 744 | 0 | case Type::DCT128X64: { | 745 | 0 | ReinterpretingIDCT< | 746 | 0 | /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim, | 747 | 0 | /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>( | 748 | 0 | block, 16 * kBlockDim, dc, dc_stride, scratch_space); | 749 | 0 | break; | 750 | 0 | } | 751 | 0 | case Type::DCT64X128: { | 752 | 0 | ReinterpretingIDCT< | 753 | 0 | /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim, | 754 | 0 | /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>( | 755 | 0 | block, 16 * kBlockDim, dc, dc_stride, scratch_space); | 756 | 0 | break; | 757 | 0 | } | 758 | 0 | case Type::DCT128X128: { | 759 | 0 | ReinterpretingIDCT< | 760 | 0 | /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim, | 761 | 0 | /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>( | 762 | 0 | block, 16 * kBlockDim, dc, dc_stride, scratch_space); | 763 | 0 | break; | 764 | 0 | } | 765 | 0 | case Type::DCT256X128: { | 766 | 0 | ReinterpretingIDCT< | 767 | 0 | /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim, | 768 | 0 | /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>( | 769 | 0 | block, 32 * kBlockDim, dc, dc_stride, scratch_space); | 770 | 0 | break; | 771 | 0 | } | 772 | 0 | case Type::DCT128X256: { | 773 | 0 | ReinterpretingIDCT< | 774 | 0 | /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim, | 775 | 0 | /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>( | 776 | 0 | block, 32 * kBlockDim, dc, dc_stride, scratch_space); | 777 | 0 | break; | 778 | 0 | } | 779 | 0 | case Type::DCT256X256: { | 780 | 0 | ReinterpretingIDCT< | 781 | 0 | /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim, | 782 | 0 | /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>( | 783 | 0 | block, 32 * kBlockDim, dc, dc_stride, scratch_space); | 784 | 0 | break; | 785 | 0 | } | 786 | 187k | case Type::DCT: | 787 | 393k | case Type::DCT2X2: | 788 | 394k | case Type::DCT4X4: | 789 | 415k | case Type::DCT4X8: | 790 | 463k | case Type::DCT8X4: | 791 | 483k | case Type::AFV0: | 792 | 497k | case Type::AFV1: | 793 | 513k | case Type::AFV2: | 794 | 536k | case Type::AFV3: | 795 | 627k | case Type::IDENTITY: | 796 | 627k | dc[0] = block[0]; | 797 | 627k | break; | 798 | 725k | } | 799 | 725k | } |
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*) Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*) enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*) Line | Count | Source | 675 | 1.91M | float* scratch_space) { | 676 | 1.91M | using Type = AcStrategyType; | 677 | 1.91M | switch (strategy) { | 678 | 24.7k | case Type::DCT16X8: { | 679 | 24.7k | ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim, | 680 | 24.7k | /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>( | 681 | 24.7k | block, 2 * kBlockDim, dc, dc_stride, scratch_space); | 682 | 24.7k | break; | 683 | 0 | } | 684 | 27.7k | case Type::DCT8X16: { | 685 | 27.7k | ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim, | 686 | 27.7k | /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>( | 687 | 27.7k | block, 2 * kBlockDim, dc, dc_stride, scratch_space); | 688 | 27.7k | break; | 689 | 0 | } | 690 | 19.1k | case Type::DCT16X16: { | 691 | 19.1k | ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim, | 692 | 19.1k | /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>( | 693 | 19.1k | block, 2 * kBlockDim, dc, dc_stride, scratch_space); | 694 | 19.1k | break; | 695 | 0 | } | 696 | 0 | case Type::DCT32X8: { | 697 | 0 | ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim, | 698 | 0 | /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>( | 699 | 0 | block, 4 * kBlockDim, dc, dc_stride, scratch_space); | 700 | 0 | break; | 701 | 0 | } | 702 | 0 | case Type::DCT8X32: { | 703 | 0 | ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim, | 704 | 0 | /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>( | 705 | 0 | block, 4 * kBlockDim, dc, dc_stride, scratch_space); | 706 | 0 | break; | 707 | 0 | } | 708 | 5.57k | case Type::DCT32X16: { | 709 | 5.57k | ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim, | 710 | 5.57k | /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>( | 711 | 5.57k | block, 4 * kBlockDim, dc, dc_stride, scratch_space); | 712 | 5.57k | break; | 713 | 0 | } | 714 | 6.93k | case Type::DCT16X32: { | 715 | 6.93k | ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim, | 716 | 6.93k | /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>( | 717 | 6.93k | block, 4 * kBlockDim, dc, dc_stride, scratch_space); | 718 | 6.93k | break; | 719 | 0 | } | 720 | 12.3k | case Type::DCT32X32: { | 721 | 12.3k | ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim, | 722 | 12.3k | /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>( | 723 | 12.3k | block, 4 * kBlockDim, dc, dc_stride, scratch_space); | 724 | 12.3k | break; | 725 | 0 | } | 726 | 162 | case Type::DCT64X32: { | 727 | 162 | ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim, | 728 | 162 | /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>( | 729 | 162 | block, 8 * kBlockDim, dc, dc_stride, scratch_space); | 730 | 162 | break; | 731 | 0 | } | 732 | 123 | case Type::DCT32X64: { | 733 | 123 | ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim, | 734 | 123 | /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>( | 735 | 123 | block, 8 * kBlockDim, dc, dc_stride, scratch_space); | 736 | 123 | break; | 737 | 0 | } | 738 | 1.23k | case Type::DCT64X64: { | 739 | 1.23k | ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim, | 740 | 1.23k | /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>( | 741 | 1.23k | block, 8 * kBlockDim, dc, dc_stride, scratch_space); | 742 | 1.23k | break; | 743 | 0 | } | 744 | 0 | case Type::DCT128X64: { | 745 | 0 | ReinterpretingIDCT< | 746 | 0 | /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim, | 747 | 0 | /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>( | 748 | 0 | block, 16 * kBlockDim, dc, dc_stride, scratch_space); | 749 | 0 | break; | 750 | 0 | } | 751 | 0 | case Type::DCT64X128: { | 752 | 0 | ReinterpretingIDCT< | 753 | 0 | /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim, | 754 | 0 | /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>( | 755 | 0 | block, 16 * kBlockDim, dc, dc_stride, scratch_space); | 756 | 0 | break; | 757 | 0 | } | 758 | 0 | case Type::DCT128X128: { | 759 | 0 | ReinterpretingIDCT< | 760 | 0 | /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim, | 761 | 0 | /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>( | 762 | 0 | block, 16 * kBlockDim, dc, dc_stride, scratch_space); | 763 | 0 | break; | 764 | 0 | } | 765 | 0 | case Type::DCT256X128: { | 766 | 0 | ReinterpretingIDCT< | 767 | 0 | /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim, | 768 | 0 | /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>( | 769 | 0 | block, 32 * kBlockDim, dc, dc_stride, scratch_space); | 770 | 0 | break; | 771 | 0 | } | 772 | 0 | case Type::DCT128X256: { | 773 | 0 | ReinterpretingIDCT< | 774 | 0 | /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim, | 775 | 0 | /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>( | 776 | 0 | block, 32 * kBlockDim, dc, dc_stride, scratch_space); | 777 | 0 | break; | 778 | 0 | } | 779 | 0 | case Type::DCT256X256: { | 780 | 0 | ReinterpretingIDCT< | 781 | 0 | /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim, | 782 | 0 | /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>( | 783 | 0 | block, 32 * kBlockDim, dc, dc_stride, scratch_space); | 784 | 0 | break; | 785 | 0 | } | 786 | 1.38M | case Type::DCT: | 787 | 1.58M | case Type::DCT2X2: | 788 | 1.58M | case Type::DCT4X4: | 789 | 1.61M | case Type::DCT4X8: | 790 | 1.65M | case Type::DCT8X4: | 791 | 1.67M | case Type::AFV0: | 792 | 1.69M | case Type::AFV1: | 793 | 1.70M | case Type::AFV2: | 794 | 1.73M | case Type::AFV3: | 795 | 1.82M | case Type::IDENTITY: | 796 | 1.82M | dc[0] = block[0]; | 797 | 1.82M | break; | 798 | 1.91M | } | 799 | 1.91M | } |
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*) Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*) |
800 | | |
801 | | } // namespace |
802 | | // NOLINTNEXTLINE(google-readability-namespace-comments) |
803 | | } // namespace HWY_NAMESPACE |
804 | | } // namespace jxl |
805 | | HWY_AFTER_NAMESPACE(); |
806 | | |
807 | | #endif // LIB_JXL_ENC_TRANSFORMS_INL_H_ |